faiss 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  5. data/vendor/faiss/faiss/AutoTune.h +55 -56
  6. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  7. data/vendor/faiss/faiss/Clustering.h +88 -35
  8. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  9. data/vendor/faiss/faiss/IVFlib.h +48 -51
  10. data/vendor/faiss/faiss/Index.cpp +85 -103
  11. data/vendor/faiss/faiss/Index.h +54 -48
  12. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  13. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  14. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  15. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  16. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  17. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  18. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  25. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  26. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  27. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  29. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  30. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  31. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  32. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  33. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  34. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  35. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  38. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  39. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  42. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  43. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  44. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  45. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  46. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  47. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  48. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  49. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  50. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  51. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  52. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  53. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  54. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  55. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  56. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  57. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  58. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  59. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  60. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  61. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  62. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  63. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  64. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  65. data/vendor/faiss/faiss/IndexShards.h +85 -73
  66. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  67. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  68. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  69. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  70. data/vendor/faiss/faiss/MetricType.h +7 -7
  71. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  72. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  73. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  74. data/vendor/faiss/faiss/clone_index.h +4 -9
  75. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  76. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  77. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  78. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  79. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  82. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  84. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  85. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  89. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  90. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  91. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  92. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  93. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  94. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  95. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  96. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  97. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  98. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  99. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  100. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  101. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  102. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  103. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  104. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  105. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  110. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  111. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  112. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  113. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  114. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  115. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  116. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  117. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  118. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  119. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  125. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  126. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  127. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  128. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  134. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  135. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  136. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  137. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  138. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  139. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  141. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  142. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  144. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  145. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  146. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  147. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  148. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  149. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  150. data/vendor/faiss/faiss/impl/io.h +31 -41
  151. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  152. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  153. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  154. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  155. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  159. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  160. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  161. data/vendor/faiss/faiss/index_factory.h +6 -7
  162. data/vendor/faiss/faiss/index_io.h +23 -26
  163. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  164. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  165. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  166. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  167. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  168. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  169. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  170. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  172. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  173. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  174. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  175. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  176. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  177. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  178. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  179. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  180. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  181. data/vendor/faiss/faiss/utils/distances.h +133 -118
  182. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  183. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  184. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  185. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  186. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  187. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  188. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  189. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  190. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  191. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  192. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  193. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  194. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  195. data/vendor/faiss/faiss/utils/random.h +13 -16
  196. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  197. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  198. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  199. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  200. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  201. data/vendor/faiss/faiss/utils/utils.h +53 -48
  202. metadata +20 -2
@@ -11,42 +11,46 @@
11
11
  * implementation of Hyper-parameter auto-tuning
12
12
  */
13
13
 
14
+ #include <faiss/index_factory.h>
15
+
14
16
  #include <faiss/AutoTune.h>
15
17
 
16
18
  #include <cinttypes>
17
19
  #include <cmath>
18
20
 
21
+ #include <regex>
22
+
19
23
  #include <faiss/impl/FaissAssert.h>
20
- #include <faiss/utils/utils.h>
21
24
  #include <faiss/utils/random.h>
25
+ #include <faiss/utils/utils.h>
22
26
 
27
+ #include <faiss/Index2Layer.h>
23
28
  #include <faiss/IndexFlat.h>
24
- #include <faiss/VectorTransform.h>
25
- #include <faiss/IndexPreTransform.h>
26
- #include <faiss/IndexLSH.h>
27
- #include <faiss/IndexPQ.h>
29
+ #include <faiss/IndexHNSW.h>
28
30
  #include <faiss/IndexIVF.h>
31
+ #include <faiss/IndexIVFFlat.h>
29
32
  #include <faiss/IndexIVFPQ.h>
33
+ #include <faiss/IndexIVFPQFastScan.h>
30
34
  #include <faiss/IndexIVFPQR.h>
31
- #include <faiss/Index2Layer.h>
32
- #include <faiss/IndexIVFFlat.h>
33
- #include <faiss/MetaIndexes.h>
34
- #include <faiss/IndexScalarQuantizer.h>
35
- #include <faiss/IndexHNSW.h>
35
+ #include <faiss/IndexLSH.h>
36
36
  #include <faiss/IndexLattice.h>
37
+ #include <faiss/IndexNSG.h>
38
+ #include <faiss/IndexPQ.h>
37
39
  #include <faiss/IndexPQFastScan.h>
38
- #include <faiss/IndexIVFPQFastScan.h>
40
+ #include <faiss/IndexPreTransform.h>
39
41
  #include <faiss/IndexRefine.h>
40
-
42
+ #include <faiss/IndexResidual.h>
43
+ #include <faiss/IndexScalarQuantizer.h>
44
+ #include <faiss/MetaIndexes.h>
45
+ #include <faiss/VectorTransform.h>
41
46
 
42
47
  #include <faiss/IndexBinaryFlat.h>
43
48
  #include <faiss/IndexBinaryHNSW.h>
44
- #include <faiss/IndexBinaryIVF.h>
45
49
  #include <faiss/IndexBinaryHash.h>
50
+ #include <faiss/IndexBinaryIVF.h>
46
51
 
47
52
  namespace faiss {
48
53
 
49
-
50
54
  /***************************************************************
51
55
  * index_factory
52
56
  ***************************************************************/
@@ -54,42 +58,48 @@ namespace faiss {
54
58
  namespace {
55
59
 
56
60
  struct VTChain {
57
- std::vector<VectorTransform *> chain;
58
- ~VTChain () {
61
+ std::vector<VectorTransform*> chain;
62
+ ~VTChain() {
59
63
  for (int i = 0; i < chain.size(); i++) {
60
64
  delete chain[i];
61
65
  }
62
66
  }
63
67
  };
64
68
 
65
-
66
69
  /// what kind of training does this coarse quantizer require?
67
- char get_trains_alone(const Index *coarse_quantizer) {
68
- return
69
- dynamic_cast<const IndexFlat*>(coarse_quantizer) ? 0 :
70
- // multi index just needs to be quantized
71
- dynamic_cast<const MultiIndexQuantizer*>(coarse_quantizer) ? 1 :
72
- dynamic_cast<const IndexHNSWFlat*>(coarse_quantizer) ? 2 :
73
- 2; // for complicated indexes, we assume they can't be used as a kmeans index
70
+ char get_trains_alone(const Index* coarse_quantizer) {
71
+ if (dynamic_cast<const IndexFlat*>(coarse_quantizer)) {
72
+ return 0;
73
+ }
74
+ // multi index just needs to be quantized
75
+ if (dynamic_cast<const MultiIndexQuantizer*>(coarse_quantizer) ||
76
+ dynamic_cast<const ResidualCoarseQuantizer*>(coarse_quantizer)) {
77
+ return 1;
78
+ }
79
+ if (dynamic_cast<const IndexHNSWFlat*>(coarse_quantizer)) {
80
+ return 2;
81
+ }
82
+ return 2; // for complicated indexes, we assume they can't be used as a
83
+ // kmeans index
74
84
  }
75
85
 
76
- bool str_ends_with(const std::string& s, const std::string& suffix)
77
- {
78
- return s.rfind(suffix) == std::abs(int(s.size()-suffix.size()));
86
+ bool str_ends_with(const std::string& s, const std::string& suffix) {
87
+ return s.rfind(suffix) == std::abs(int(s.size() - suffix.size()));
79
88
  }
80
89
 
81
90
  // check if ends with suffix followed by digits
82
- bool str_ends_with_digits(const std::string& s, const std::string& suffix)
83
- {
91
+ bool str_ends_with_digits(const std::string& s, const std::string& suffix) {
84
92
  int i;
85
- for(i = s.length() - 1; i >= 0; i--) {
86
- if (!isdigit(s[i])) break;
93
+ for (i = s.length() - 1; i >= 0; i--) {
94
+ if (!isdigit(s[i]))
95
+ break;
87
96
  }
88
97
  return str_ends_with(s.substr(0, i + 1), suffix);
89
98
  }
90
99
 
91
- void find_matching_parentheses(const std::string &s, int & i0, int & i1) {
100
+ void find_matching_parentheses(const std::string& s, int& i0, int& i1) {
92
101
  int st = 0;
102
+ i0 = i1 = 0;
93
103
  for (int i = 0; i < s.length(); i++) {
94
104
  if (s[i] == '(') {
95
105
  if (st == 0) {
@@ -105,32 +115,30 @@ void find_matching_parentheses(const std::string &s, int & i0, int & i1) {
105
115
  return;
106
116
  }
107
117
  if (st < 0) {
108
- FAISS_THROW_FMT("factory string %s: unbalanced parentheses", s.c_str());
118
+ FAISS_THROW_FMT(
119
+ "factory string %s: unbalanced parentheses", s.c_str());
109
120
  }
110
121
  }
111
-
112
122
  }
113
- FAISS_THROW_FMT("factory string %s: unbalanced parentheses st=%d", s.c_str(), st);
114
-
123
+ FAISS_THROW_FMT(
124
+ "factory string %s: unbalanced parentheses st=%d", s.c_str(), st);
115
125
  }
116
126
 
117
127
  } // anonymous namespace
118
128
 
119
- Index *index_factory (int d, const char *description_in, MetricType metric)
120
- {
121
- FAISS_THROW_IF_NOT(metric == METRIC_L2 ||
122
- metric == METRIC_INNER_PRODUCT);
129
+ Index* index_factory(int d, const char* description_in, MetricType metric) {
130
+ FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT);
123
131
  VTChain vts;
124
- Index *coarse_quantizer = nullptr;
132
+ Index* coarse_quantizer = nullptr;
125
133
  std::string parenthesis_ivf, parenthesis_refine;
126
- Index *index = nullptr;
134
+ Index* index = nullptr;
127
135
  bool add_idmap = false;
128
136
  int d_in = d;
129
137
 
130
138
  ScopeDeleter1<Index> del_coarse_quantizer, del_index;
131
139
 
132
140
  std::string description(description_in);
133
- char *ptr;
141
+ char* ptr;
134
142
 
135
143
  // handle indexes in parentheses
136
144
  while (description.find('(') != std::string::npos) {
@@ -153,11 +161,11 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
153
161
  int64_t ncentroids = -1;
154
162
  bool use_2layer = false;
155
163
  int hnsw_M = -1;
164
+ int nsg_R = -1;
156
165
 
157
- for (char *tok = strtok_r (&description[0], " ,", &ptr);
158
- tok;
159
- tok = strtok_r (nullptr, " ,", &ptr)) {
160
- int d_out, opq_M, nbit, M, M2, pq_m, ncent, r2;
166
+ for (char* tok = strtok_r(&description[0], " ,", &ptr); tok;
167
+ tok = strtok_r(nullptr, " ,", &ptr)) {
168
+ int d_out, opq_M, nbit, M, M2, pq_m, ncent, r2, R;
161
169
  std::string stok(tok);
162
170
  nbit = 8;
163
171
  int bbs = -1;
@@ -166,234 +174,276 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
166
174
  // to avoid mem leaks with exceptions:
167
175
  // do all tests before any instanciation
168
176
 
169
- VectorTransform *vt_1 = nullptr;
170
- Index *coarse_quantizer_1 = nullptr;
171
- Index *index_1 = nullptr;
177
+ VectorTransform* vt_1 = nullptr;
178
+ Index* coarse_quantizer_1 = nullptr;
179
+ Index* index_1 = nullptr;
172
180
 
173
181
  // VectorTransforms
174
- if (sscanf (tok, "PCA%d", &d_out) == 1) {
175
- vt_1 = new PCAMatrix (d, d_out);
182
+ if (sscanf(tok, "PCA%d", &d_out) == 1) {
183
+ vt_1 = new PCAMatrix(d, d_out);
176
184
  d = d_out;
177
- } else if (sscanf (tok, "PCAR%d", &d_out) == 1) {
178
- vt_1 = new PCAMatrix (d, d_out, 0, true);
185
+ } else if (sscanf(tok, "PCAR%d", &d_out) == 1) {
186
+ vt_1 = new PCAMatrix(d, d_out, 0, true);
179
187
  d = d_out;
180
- } else if (sscanf (tok, "RR%d", &d_out) == 1) {
181
- vt_1 = new RandomRotationMatrix (d, d_out);
188
+ } else if (sscanf(tok, "RR%d", &d_out) == 1) {
189
+ vt_1 = new RandomRotationMatrix(d, d_out);
182
190
  d = d_out;
183
- } else if (sscanf (tok, "PCAW%d", &d_out) == 1) {
184
- vt_1 = new PCAMatrix (d, d_out, -0.5, false);
191
+ } else if (sscanf(tok, "PCAW%d", &d_out) == 1) {
192
+ vt_1 = new PCAMatrix(d, d_out, -0.5, false);
185
193
  d = d_out;
186
- } else if (sscanf (tok, "PCAWR%d", &d_out) == 1) {
187
- vt_1 = new PCAMatrix (d, d_out, -0.5, true);
194
+ } else if (sscanf(tok, "PCAWR%d", &d_out) == 1) {
195
+ vt_1 = new PCAMatrix(d, d_out, -0.5, true);
188
196
  d = d_out;
189
- } else if (sscanf (tok, "OPQ%d_%d", &opq_M, &d_out) == 2) {
190
- vt_1 = new OPQMatrix (d, opq_M, d_out);
197
+ } else if (sscanf(tok, "OPQ%d_%d", &opq_M, &d_out) == 2) {
198
+ vt_1 = new OPQMatrix(d, opq_M, d_out);
191
199
  d = d_out;
192
- } else if (sscanf (tok, "OPQ%d", &opq_M) == 1) {
193
- vt_1 = new OPQMatrix (d, opq_M);
194
- } else if (sscanf (tok, "ITQ%d", &d_out) == 1) {
195
- vt_1 = new ITQTransform (d, d_out, true);
200
+ } else if (sscanf(tok, "OPQ%d", &opq_M) == 1) {
201
+ vt_1 = new OPQMatrix(d, opq_M);
202
+ } else if (sscanf(tok, "ITQ%d", &d_out) == 1) {
203
+ vt_1 = new ITQTransform(d, d_out, true);
196
204
  d = d_out;
197
205
  } else if (stok == "ITQ") {
198
- vt_1 = new ITQTransform (d, d, false);
199
- } else if (sscanf (tok, "Pad%d", &d_out) == 1) {
206
+ vt_1 = new ITQTransform(d, d, false);
207
+ } else if (sscanf(tok, "Pad%d", &d_out) == 1) {
200
208
  if (d_out > d) {
201
- vt_1 = new RemapDimensionsTransform (d, d_out, false);
209
+ vt_1 = new RemapDimensionsTransform(d, d_out, false);
202
210
  d = d_out;
203
211
  }
204
212
  } else if (stok == "L2norm") {
205
- vt_1 = new NormalizationTransform (d, 2.0);
206
-
207
- // coarse quantizers
208
- } else if (!coarse_quantizer &&
209
- sscanf (tok, "IVF%" PRId64 "_HNSW%d", &ncentroids, &M) == 2) {
210
- coarse_quantizer_1 = new IndexHNSWFlat (d, M, metric);
211
-
212
- } else if (!coarse_quantizer &&
213
- sscanf (tok, "IVF%" PRId64, &ncentroids) == 1) {
213
+ vt_1 = new NormalizationTransform(d, 2.0);
214
+
215
+ // coarse quantizers
216
+ } else if (
217
+ !coarse_quantizer &&
218
+ sscanf(tok, "IVF%" PRId64 "_HNSW%d", &ncentroids, &M) == 2) {
219
+ coarse_quantizer_1 = new IndexHNSWFlat(d, M, metric);
220
+
221
+ } else if (
222
+ !coarse_quantizer &&
223
+ sscanf(tok, "IVF%" PRId64 "_NSG%d", &ncentroids, &R) == 2) {
224
+ coarse_quantizer_1 = new IndexNSGFlat(d, R, metric);
225
+
226
+ } else if (
227
+ !coarse_quantizer &&
228
+ sscanf(tok, "IVF%" PRId64, &ncentroids) == 1) {
214
229
  if (!parenthesis_ivf.empty()) {
215
230
  coarse_quantizer_1 =
216
- index_factory(d, parenthesis_ivf.c_str(), metric);
217
-
231
+ index_factory(d, parenthesis_ivf.c_str(), metric);
218
232
  } else if (metric == METRIC_L2) {
219
- coarse_quantizer_1 = new IndexFlatL2 (d);
233
+ coarse_quantizer_1 = new IndexFlatL2(d);
220
234
  } else {
221
- coarse_quantizer_1 = new IndexFlatIP (d);
235
+ coarse_quantizer_1 = new IndexFlatIP(d);
222
236
  }
223
237
 
224
- } else if (!coarse_quantizer && sscanf (tok, "IMI2x%d", &nbit) == 1) {
225
- FAISS_THROW_IF_NOT_MSG (metric == METRIC_L2,
226
- "MultiIndex not implemented for inner prod search");
227
- coarse_quantizer_1 = new MultiIndexQuantizer (d, 2, nbit);
238
+ } else if (!coarse_quantizer && sscanf(tok, "IMI2x%d", &nbit) == 1) {
239
+ FAISS_THROW_IF_NOT_MSG(
240
+ metric == METRIC_L2,
241
+ "MultiIndex not implemented for inner prod search");
242
+ coarse_quantizer_1 = new MultiIndexQuantizer(d, 2, nbit);
228
243
  ncentroids = 1 << (2 * nbit);
229
244
 
230
- } else if (!coarse_quantizer &&
231
- sscanf (tok, "Residual%dx%d", &M, &nbit) == 2) {
232
- FAISS_THROW_IF_NOT_MSG (metric == METRIC_L2,
233
- "MultiIndex not implemented for inner prod search");
234
- coarse_quantizer_1 = new MultiIndexQuantizer (d, M, nbit);
245
+ } else if (
246
+ !coarse_quantizer &&
247
+ sscanf(tok, "Residual%dx%d", &M, &nbit) == 2) {
248
+ FAISS_THROW_IF_NOT_MSG(
249
+ metric == METRIC_L2,
250
+ "MultiIndex not implemented for inner prod search");
251
+ coarse_quantizer_1 = new MultiIndexQuantizer(d, M, nbit);
235
252
  ncentroids = int64_t(1) << (M * nbit);
236
253
  use_2layer = true;
237
254
 
238
- } else if (!coarse_quantizer &&
239
- sscanf (tok, "Residual%" PRId64, &ncentroids) == 1) {
240
- coarse_quantizer_1 = new IndexFlatL2 (d);
255
+ } else if (std::regex_match(
256
+ stok,
257
+ std::regex(
258
+ "(RQ|RCQ)[0-9]+x[0-9]+(_[0-9]+x[0-9]+)*"))) {
259
+ std::vector<size_t> nbits;
260
+ std::smatch sm;
261
+ bool is_RCQ = stok.find("RCQ") == 0;
262
+ while (std::regex_search(
263
+ stok, sm, std::regex("([0-9]+)x([0-9]+)"))) {
264
+ int M = std::stoi(sm[1].str());
265
+ int nbit = std::stoi(sm[2].str());
266
+ nbits.resize(nbits.size() + M, nbit);
267
+ stok = sm.suffix();
268
+ }
269
+ if (!is_RCQ) {
270
+ index_1 = new IndexResidual(d, nbits, metric);
271
+ } else {
272
+ index_1 = new ResidualCoarseQuantizer(d, nbits, metric);
273
+ }
274
+ } else if (
275
+ !coarse_quantizer &&
276
+ sscanf(tok, "Residual%" PRId64, &ncentroids) == 1) {
277
+ coarse_quantizer_1 = new IndexFlatL2(d);
241
278
  use_2layer = true;
242
279
 
243
280
  } else if (stok == "IDMap") {
244
281
  add_idmap = true;
245
282
 
246
- // IVFs
283
+ // IVFs
247
284
  } else if (!index && (stok == "Flat" || stok == "FlatDedup")) {
248
285
  if (coarse_quantizer) {
249
286
  // if there was an IVF in front, then it is an IVFFlat
250
- IndexIVF *index_ivf = stok == "Flat" ?
251
- new IndexIVFFlat (
252
- coarse_quantizer, d, ncentroids, metric) :
253
- new IndexIVFFlatDedup (
254
- coarse_quantizer, d, ncentroids, metric);
287
+ IndexIVF* index_ivf = stok == "Flat"
288
+ ? new IndexIVFFlat(
289
+ coarse_quantizer, d, ncentroids, metric)
290
+ : new IndexIVFFlatDedup(
291
+ coarse_quantizer, d, ncentroids, metric);
255
292
  index_ivf->quantizer_trains_alone =
256
- get_trains_alone (coarse_quantizer);
293
+ get_trains_alone(coarse_quantizer);
257
294
  index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT;
258
- del_coarse_quantizer.release ();
295
+ del_coarse_quantizer.release();
259
296
  index_ivf->own_fields = true;
260
297
  index_1 = index_ivf;
261
298
  } else if (hnsw_M > 0) {
262
- index_1 = new IndexHNSWFlat (d, hnsw_M, metric);
299
+ index_1 = new IndexHNSWFlat(d, hnsw_M, metric);
300
+ } else if (nsg_R > 0) {
301
+ index_1 = new IndexNSGFlat(d, nsg_R, metric);
263
302
  } else {
264
- FAISS_THROW_IF_NOT_MSG (stok != "FlatDedup",
265
- "dedup supported only for IVFFlat");
266
- index_1 = new IndexFlat (d, metric);
303
+ FAISS_THROW_IF_NOT_MSG(
304
+ stok != "FlatDedup",
305
+ "dedup supported only for IVFFlat");
306
+ index_1 = new IndexFlat(d, metric);
267
307
  }
268
- } else if (!index && (stok == "SQ8" || stok == "SQ4" || stok == "SQ6" ||
269
- stok == "SQfp16")) {
270
- ScalarQuantizer::QuantizerType qt =
271
- stok == "SQ8" ? ScalarQuantizer::QT_8bit :
272
- stok == "SQ6" ? ScalarQuantizer::QT_6bit :
273
- stok == "SQ4" ? ScalarQuantizer::QT_4bit :
274
- stok == "SQfp16" ? ScalarQuantizer::QT_fp16 :
275
- ScalarQuantizer::QT_4bit;
308
+ } else if (
309
+ !index &&
310
+ (stok == "SQ8" || stok == "SQ4" || stok == "SQ6" ||
311
+ stok == "SQfp16")) {
312
+ ScalarQuantizer::QuantizerType qt = stok == "SQ8"
313
+ ? ScalarQuantizer::QT_8bit
314
+ : stok == "SQ6" ? ScalarQuantizer::QT_6bit
315
+ : stok == "SQ4" ? ScalarQuantizer::QT_4bit
316
+ : stok == "SQfp16" ? ScalarQuantizer::QT_fp16
317
+ : ScalarQuantizer::QT_4bit;
276
318
  if (coarse_quantizer) {
277
- FAISS_THROW_IF_NOT (!use_2layer);
278
- IndexIVFScalarQuantizer *index_ivf =
279
- new IndexIVFScalarQuantizer (
280
- coarse_quantizer, d, ncentroids, qt, metric);
319
+ FAISS_THROW_IF_NOT(!use_2layer);
320
+ IndexIVFScalarQuantizer* index_ivf =
321
+ new IndexIVFScalarQuantizer(
322
+ coarse_quantizer, d, ncentroids, qt, metric);
281
323
  index_ivf->quantizer_trains_alone =
282
- get_trains_alone (coarse_quantizer);
283
- del_coarse_quantizer.release ();
324
+ get_trains_alone(coarse_quantizer);
325
+ del_coarse_quantizer.release();
284
326
  index_ivf->own_fields = true;
285
327
  index_1 = index_ivf;
286
328
  } else if (hnsw_M > 0) {
287
329
  index_1 = new IndexHNSWSQ(d, qt, hnsw_M, metric);
288
330
  } else {
289
- index_1 = new IndexScalarQuantizer (d, qt, metric);
331
+ index_1 = new IndexScalarQuantizer(d, qt, metric);
290
332
  }
291
- } else if (!index && sscanf (tok, "PQ%d+%d", &M, &M2) == 2) {
292
- FAISS_THROW_IF_NOT_MSG(coarse_quantizer,
293
- "PQ with + works only with an IVF");
294
- FAISS_THROW_IF_NOT_MSG(metric == METRIC_L2,
295
- "IVFPQR not implemented for inner product search");
296
- IndexIVFPQR *index_ivf = new IndexIVFPQR (
297
- coarse_quantizer, d, ncentroids, M, 8, M2, 8);
333
+ } else if (!index && sscanf(tok, "PQ%d+%d", &M, &M2) == 2) {
334
+ FAISS_THROW_IF_NOT_MSG(
335
+ coarse_quantizer, "PQ with + works only with an IVF");
336
+ FAISS_THROW_IF_NOT_MSG(
337
+ metric == METRIC_L2,
338
+ "IVFPQR not implemented for inner product search");
339
+ IndexIVFPQR* index_ivf = new IndexIVFPQR(
340
+ coarse_quantizer, d, ncentroids, M, 8, M2, 8);
298
341
  index_ivf->quantizer_trains_alone =
299
- get_trains_alone (coarse_quantizer);
300
- del_coarse_quantizer.release ();
342
+ get_trains_alone(coarse_quantizer);
343
+ del_coarse_quantizer.release();
301
344
  index_ivf->own_fields = true;
302
345
  index_1 = index_ivf;
303
- } else if (!index && (
304
- sscanf (tok, "PQ%dx4fs_%d", &M, &bbs) == 2 ||
305
- (sscanf (tok, "PQ%dx4f%c", &M, &c) == 2 && c == 's') ||
306
- (sscanf (tok, "PQ%dx4fs%c", &M, &c) == 2 && c == 'r'))) {
346
+ } else if (
347
+ !index &&
348
+ (sscanf(tok, "PQ%dx4fs_%d", &M, &bbs) == 2 ||
349
+ (sscanf(tok, "PQ%dx4f%c", &M, &c) == 2 && c == 's') ||
350
+ (sscanf(tok, "PQ%dx4fs%c", &M, &c) == 2 && c == 'r'))) {
307
351
  if (bbs == -1) {
308
352
  bbs = 32;
309
353
  }
310
354
  bool by_residual = str_ends_with(stok, "fsr");
311
355
  if (coarse_quantizer) {
312
- IndexIVFPQFastScan *index_ivf = new IndexIVFPQFastScan(
313
- coarse_quantizer, d, ncentroids, M, 4, metric, bbs
314
- );
356
+ IndexIVFPQFastScan* index_ivf = new IndexIVFPQFastScan(
357
+ coarse_quantizer, d, ncentroids, M, 4, metric, bbs);
315
358
  index_ivf->quantizer_trains_alone =
316
- get_trains_alone (coarse_quantizer);
359
+ get_trains_alone(coarse_quantizer);
317
360
  index_ivf->metric_type = metric;
318
361
  index_ivf->by_residual = by_residual;
319
362
  index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT;
320
- del_coarse_quantizer.release ();
363
+ del_coarse_quantizer.release();
321
364
  index_ivf->own_fields = true;
322
365
  index_1 = index_ivf;
323
366
  } else {
324
- IndexPQFastScan *index_pq = new IndexPQFastScan (
325
- d, M, 4, metric, bbs
326
- );
367
+ IndexPQFastScan* index_pq =
368
+ new IndexPQFastScan(d, M, 4, metric, bbs);
327
369
  index_1 = index_pq;
328
370
  }
329
- } else if (!index && (sscanf (tok, "PQ%dx%d", &M, &nbit) == 2 ||
330
- sscanf (tok, "PQ%d", &M) == 1 ||
331
- sscanf (tok, "PQ%dnp", &M) == 1)) {
371
+ } else if (
372
+ !index &&
373
+ (sscanf(tok, "PQ%dx%d", &M, &nbit) == 2 ||
374
+ sscanf(tok, "PQ%d", &M) == 1 ||
375
+ sscanf(tok, "PQ%dnp", &M) == 1)) {
332
376
  bool do_polysemous_training = stok.find("np") == std::string::npos;
333
377
  if (coarse_quantizer) {
334
378
  if (!use_2layer) {
335
- IndexIVFPQ *index_ivf = new IndexIVFPQ (
336
- coarse_quantizer, d, ncentroids, M, nbit);
379
+ IndexIVFPQ* index_ivf = new IndexIVFPQ(
380
+ coarse_quantizer, d, ncentroids, M, nbit);
337
381
  index_ivf->quantizer_trains_alone =
338
- get_trains_alone (coarse_quantizer);
382
+ get_trains_alone(coarse_quantizer);
339
383
  index_ivf->metric_type = metric;
340
384
  index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT;
341
- del_coarse_quantizer.release ();
385
+ del_coarse_quantizer.release();
342
386
  index_ivf->own_fields = true;
343
387
  index_ivf->do_polysemous_training = do_polysemous_training;
344
388
  index_1 = index_ivf;
345
389
  } else {
346
- Index2Layer *index_2l = new Index2Layer
347
- (coarse_quantizer, ncentroids, M, nbit);
390
+ Index2Layer* index_2l = new Index2Layer(
391
+ coarse_quantizer, ncentroids, M, nbit);
348
392
  index_2l->q1.quantizer_trains_alone =
349
- get_trains_alone (coarse_quantizer);
393
+ get_trains_alone(coarse_quantizer);
350
394
  index_2l->q1.own_fields = true;
351
395
  index_1 = index_2l;
352
396
  }
353
397
  } else if (hnsw_M > 0) {
354
- IndexHNSWPQ *ipq = new IndexHNSWPQ(d, M, hnsw_M);
398
+ IndexHNSWPQ* ipq = new IndexHNSWPQ(d, M, hnsw_M);
355
399
  dynamic_cast<IndexPQ*>(ipq->storage)->do_polysemous_training =
356
- do_polysemous_training;
400
+ do_polysemous_training;
357
401
  index_1 = ipq;
358
402
  } else {
359
- IndexPQ *index_pq = new IndexPQ (d, M, nbit, metric);
403
+ IndexPQ* index_pq = new IndexPQ(d, M, nbit, metric);
360
404
  index_pq->do_polysemous_training = do_polysemous_training;
361
405
  index_1 = index_pq;
362
406
  }
363
- } else if (!index &&
364
- sscanf (tok, "HNSW%d_%d+PQ%d", &M, &ncent, &pq_m) == 3) {
365
- Index * quant = new IndexFlatL2 (d);
366
- IndexHNSW2Level * hidx2l = new IndexHNSW2Level (quant, ncent, pq_m, M);
367
- Index2Layer * idx2l = dynamic_cast<Index2Layer*>(hidx2l->storage);
407
+ } else if (
408
+ !index &&
409
+ sscanf(tok, "HNSW%d_%d+PQ%d", &M, &ncent, &pq_m) == 3) {
410
+ Index* quant = new IndexFlatL2(d);
411
+ IndexHNSW2Level* hidx2l =
412
+ new IndexHNSW2Level(quant, ncent, pq_m, M);
413
+ Index2Layer* idx2l = dynamic_cast<Index2Layer*>(hidx2l->storage);
368
414
  idx2l->q1.own_fields = true;
369
415
  index_1 = hidx2l;
370
- } else if (!index &&
371
- sscanf (tok, "HNSW%d_2x%d+PQ%d", &M, &nbit, &pq_m) == 3) {
372
- Index * quant = new MultiIndexQuantizer (d, 2, nbit);
373
- IndexHNSW2Level * hidx2l =
374
- new IndexHNSW2Level (quant, 1 << (2 * nbit), pq_m, M);
375
- Index2Layer * idx2l = dynamic_cast<Index2Layer*>(hidx2l->storage);
416
+ } else if (
417
+ !index &&
418
+ sscanf(tok, "HNSW%d_2x%d+PQ%d", &M, &nbit, &pq_m) == 3) {
419
+ Index* quant = new MultiIndexQuantizer(d, 2, nbit);
420
+ IndexHNSW2Level* hidx2l =
421
+ new IndexHNSW2Level(quant, 1 << (2 * nbit), pq_m, M);
422
+ Index2Layer* idx2l = dynamic_cast<Index2Layer*>(hidx2l->storage);
376
423
  idx2l->q1.own_fields = true;
377
424
  idx2l->q1.quantizer_trains_alone = 1;
378
425
  index_1 = hidx2l;
379
- } else if (!index &&
380
- sscanf (tok, "HNSW%d_PQ%d", &M, &pq_m) == 2) {
381
- index_1 = new IndexHNSWPQ (d, pq_m, M);
382
- } else if (!index &&
383
- sscanf (tok, "HNSW%d_SQ%d", &M, &pq_m) == 2 &&
384
- pq_m == 8) {
385
- index_1 = new IndexHNSWSQ (d, ScalarQuantizer::QT_8bit, M);
386
- } else if (!index &&
387
- sscanf (tok, "HNSW%d", &M) == 1) {
426
+ } else if (!index && sscanf(tok, "HNSW%d_PQ%d", &M, &pq_m) == 2) {
427
+ index_1 = new IndexHNSWPQ(d, pq_m, M);
428
+ } else if (
429
+ !index && sscanf(tok, "HNSW%d_SQ%d", &M, &pq_m) == 2 &&
430
+ pq_m == 8) {
431
+ index_1 = new IndexHNSWSQ(d, ScalarQuantizer::QT_8bit, M);
432
+ } else if (!index && sscanf(tok, "HNSW%d", &M) == 1) {
388
433
  hnsw_M = M;
389
434
  // here it is unclear what we want: HNSW flat or HNSWx,Y ?
390
- } else if (!index && (stok == "LSH" || stok == "LSHr" ||
391
- stok == "LSHrt" || stok == "LSHt")) {
435
+ } else if (!index && sscanf(tok, "NSG%d", &R) == 1) {
436
+ nsg_R = R;
437
+ } else if (
438
+ !index &&
439
+ (stok == "LSH" || stok == "LSHr" || stok == "LSHrt" ||
440
+ stok == "LSHt")) {
392
441
  bool rotate_data = strstr(tok, "r") != nullptr;
393
442
  bool train_thresholds = strstr(tok, "t") != nullptr;
394
- index_1 = new IndexLSH (d, d, rotate_data, train_thresholds);
395
- } else if (!index &&
396
- sscanf (tok, "ZnLattice%dx%d_%d", &M, &r2, &nbit) == 3) {
443
+ index_1 = new IndexLSH(d, d, rotate_data, train_thresholds);
444
+ } else if (
445
+ !index &&
446
+ sscanf(tok, "ZnLattice%dx%d_%d", &M, &r2, &nbit) == 3) {
397
447
  FAISS_THROW_IF_NOT(!coarse_quantizer);
398
448
  index_1 = new IndexLattice(d, M, nbit, r2);
399
449
  } else if (stok == "RFlat") {
@@ -401,67 +451,73 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
401
451
  } else if (stok == "Refine") {
402
452
  FAISS_THROW_IF_NOT_MSG(
403
453
  !parenthesis_refine.empty(),
404
- "Refine index should be provided in parentheses"
405
- );
454
+ "Refine index should be provided in parentheses");
406
455
  } else {
407
- FAISS_THROW_FMT( "could not parse token \"%s\" in %s\n",
408
- tok, description_in);
456
+ FAISS_THROW_FMT(
457
+ "could not parse token \"%s\" in %s\n",
458
+ tok,
459
+ description_in);
409
460
  }
410
461
 
411
462
  if (index_1 && add_idmap) {
412
- IndexIDMap *idmap = new IndexIDMap(index_1);
413
- del_index.set (idmap);
463
+ IndexIDMap* idmap = new IndexIDMap(index_1);
464
+ del_index.set(idmap);
414
465
  idmap->own_fields = true;
415
466
  index_1 = idmap;
416
467
  add_idmap = false;
417
468
  }
418
469
 
419
- if (vt_1) {
420
- vts.chain.push_back (vt_1);
470
+ if (vt_1) {
471
+ vts.chain.push_back(vt_1);
421
472
  }
422
473
 
423
474
  if (coarse_quantizer_1) {
424
475
  coarse_quantizer = coarse_quantizer_1;
425
- del_coarse_quantizer.set (coarse_quantizer);
476
+ del_coarse_quantizer.set(coarse_quantizer);
426
477
  }
427
478
 
428
479
  if (index_1) {
429
480
  index = index_1;
430
- del_index.set (index);
481
+ del_index.set(index);
431
482
  }
432
483
  }
433
484
 
434
485
  if (!index && hnsw_M > 0) {
435
- index = new IndexHNSWFlat (d, hnsw_M, metric);
436
- del_index.set (index);
486
+ index = new IndexHNSWFlat(d, hnsw_M, metric);
487
+ del_index.set(index);
488
+ } else if (!index && nsg_R > 0) {
489
+ index = new IndexNSGFlat(d, nsg_R, metric);
490
+ del_index.set(index);
437
491
  }
438
492
 
439
- FAISS_THROW_IF_NOT_FMT(index, "description %s did not generate an index",
440
- description_in);
493
+ FAISS_THROW_IF_NOT_FMT(
494
+ index, "description %s did not generate an index", description_in);
441
495
 
442
496
  // nothing can go wrong now
443
- del_index.release ();
444
- del_coarse_quantizer.release ();
497
+ del_index.release();
498
+ del_coarse_quantizer.release();
445
499
 
446
500
  if (add_idmap) {
447
- fprintf(stderr, "index_factory: WARNING: "
501
+ fprintf(stderr,
502
+ "index_factory: WARNING: "
448
503
  "IDMap option not used\n");
449
504
  }
450
505
 
451
506
  if (vts.chain.size() > 0) {
452
- IndexPreTransform *index_pt = new IndexPreTransform (index);
507
+ IndexPreTransform* index_pt = new IndexPreTransform(index);
453
508
  index_pt->own_fields = true;
454
509
  // add from back
455
510
  while (vts.chain.size() > 0) {
456
- index_pt->prepend_transform (vts.chain.back ());
457
- vts.chain.pop_back ();
511
+ index_pt->prepend_transform(vts.chain.back());
512
+ vts.chain.pop_back();
458
513
  }
459
514
  index = index_pt;
460
515
  }
461
516
 
462
517
  if (!parenthesis_refine.empty()) {
463
- Index *refine_index = index_factory(d_in, parenthesis_refine.c_str(), metric);
464
- IndexRefine *index_rf = new IndexRefine(index, refine_index);
518
+ Index* refine_index =
519
+ index_factory(d_in, parenthesis_refine.c_str(), metric);
520
+ IndexRefine* index_rf = new IndexRefine(index, refine_index);
465
521
  index_rf->own_refine_index = true;
466
522
  index_rf->own_fields = true;
467
523
  index = index_rf;
@@ -470,48 +526,43 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
470
526
  return index;
471
527
  }
472
528
 
473
- IndexBinary *index_binary_factory(int d, const char *description)
474
- {
475
- IndexBinary *index = nullptr;
529
+ IndexBinary* index_binary_factory(int d, const char* description) {
530
+ IndexBinary* index = nullptr;
476
531
 
477
532
  int ncentroids = -1;
478
533
  int M, nhash, b;
479
534
 
480
535
  if (sscanf(description, "BIVF%d_HNSW%d", &ncentroids, &M) == 2) {
481
- IndexBinaryIVF *index_ivf = new IndexBinaryIVF(
482
- new IndexBinaryHNSW(d, M), d, ncentroids
483
- );
536
+ IndexBinaryIVF* index_ivf =
537
+ new IndexBinaryIVF(new IndexBinaryHNSW(d, M), d, ncentroids);
484
538
  index_ivf->own_fields = true;
485
539
  index = index_ivf;
486
540
 
487
541
  } else if (sscanf(description, "BIVF%d", &ncentroids) == 1) {
488
- IndexBinaryIVF *index_ivf = new IndexBinaryIVF(
489
- new IndexBinaryFlat(d), d, ncentroids
490
- );
542
+ IndexBinaryIVF* index_ivf =
543
+ new IndexBinaryIVF(new IndexBinaryFlat(d), d, ncentroids);
491
544
  index_ivf->own_fields = true;
492
545
  index = index_ivf;
493
546
 
494
547
  } else if (sscanf(description, "BHNSW%d", &M) == 1) {
495
- IndexBinaryHNSW *index_hnsw = new IndexBinaryHNSW(d, M);
548
+ IndexBinaryHNSW* index_hnsw = new IndexBinaryHNSW(d, M);
496
549
  index = index_hnsw;
497
550
 
498
551
  } else if (sscanf(description, "BHash%dx%d", &nhash, &b) == 2) {
499
- index = new IndexBinaryMultiHash (d, nhash, b);
552
+ index = new IndexBinaryMultiHash(d, nhash, b);
500
553
 
501
554
  } else if (sscanf(description, "BHash%d", &b) == 1) {
502
- index = new IndexBinaryHash (d, b);
555
+ index = new IndexBinaryHash(d, b);
503
556
 
504
557
  } else if (std::string(description) == "BFlat") {
505
558
  index = new IndexBinaryFlat(d);
506
559
 
507
560
  } else {
508
- FAISS_THROW_IF_NOT_FMT(index, "description %s did not generate an index",
509
- description);
561
+ FAISS_THROW_IF_NOT_FMT(
562
+ index, "description %s did not generate an index", description);
510
563
  }
511
564
 
512
565
  return index;
513
566
  }
514
567
 
515
-
516
-
517
568
  } // namespace faiss