faiss 0.1.7 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/README.md +7 -7
  4. data/ext/faiss/ext.cpp +1 -1
  5. data/ext/faiss/extconf.rb +8 -2
  6. data/ext/faiss/index.cpp +102 -69
  7. data/ext/faiss/index_binary.cpp +24 -30
  8. data/ext/faiss/kmeans.cpp +20 -16
  9. data/ext/faiss/numo.hpp +867 -0
  10. data/ext/faiss/pca_matrix.cpp +13 -14
  11. data/ext/faiss/product_quantizer.cpp +23 -24
  12. data/ext/faiss/utils.cpp +10 -37
  13. data/ext/faiss/utils.h +2 -13
  14. data/lib/faiss/version.rb +1 -1
  15. data/lib/faiss.rb +0 -5
  16. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  17. data/vendor/faiss/faiss/AutoTune.h +55 -56
  18. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  19. data/vendor/faiss/faiss/Clustering.h +88 -35
  20. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  21. data/vendor/faiss/faiss/IVFlib.h +48 -51
  22. data/vendor/faiss/faiss/Index.cpp +85 -103
  23. data/vendor/faiss/faiss/Index.h +54 -48
  24. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  25. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  26. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  27. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  28. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  29. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  30. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  31. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  32. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  33. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  34. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  35. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  36. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  37. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  38. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  39. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  40. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  41. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  42. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  43. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  54. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  55. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  69. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  73. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  76. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  77. data/vendor/faiss/faiss/IndexShards.h +85 -73
  78. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  79. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  81. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  82. data/vendor/faiss/faiss/MetricType.h +7 -7
  83. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  84. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  85. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  86. data/vendor/faiss/faiss/clone_index.h +4 -9
  87. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  88. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  89. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  90. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  91. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  96. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  102. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  103. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  104. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  106. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  108. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  110. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  112. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  113. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  114. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  115. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  116. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  121. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  122. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  124. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  125. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  126. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  128. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  129. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  130. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  131. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  133. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  135. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  136. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  137. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  138. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  139. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  140. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  142. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  144. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  145. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  146. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  148. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  149. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  151. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  153. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  154. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  156. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  157. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  158. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  159. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  160. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  161. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  162. data/vendor/faiss/faiss/impl/io.h +31 -41
  163. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  164. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  165. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  166. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  167. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  171. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  172. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  173. data/vendor/faiss/faiss/index_factory.h +6 -7
  174. data/vendor/faiss/faiss/index_io.h +23 -26
  175. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  177. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  178. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  179. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  180. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  181. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  183. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  185. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  186. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  187. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  188. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  189. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  190. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  191. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  192. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  193. data/vendor/faiss/faiss/utils/distances.h +133 -118
  194. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  195. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  196. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  197. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  198. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  199. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  200. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  201. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  202. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  203. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  204. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  205. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  206. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  207. data/vendor/faiss/faiss/utils/random.h +13 -16
  208. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  209. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  210. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  211. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  212. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  213. data/vendor/faiss/faiss/utils/utils.h +53 -48
  214. metadata +26 -12
  215. data/lib/faiss/index.rb +0 -20
  216. data/lib/faiss/index_binary.rb +0 -20
  217. data/lib/faiss/kmeans.rb +0 -15
  218. data/lib/faiss/pca_matrix.rb +0 -15
  219. data/lib/faiss/product_quantizer.rb +0 -22
@@ -9,11 +9,13 @@
9
9
 
10
10
  #include <faiss/index_io.h>
11
11
 
12
+ #include <faiss/impl/io_macros.h>
13
+
12
14
  #include <cstdio>
13
15
  #include <cstdlib>
14
16
 
15
- #include <sys/types.h>
16
17
  #include <sys/stat.h>
18
+ #include <sys/types.h>
17
19
 
18
20
  #include <faiss/impl/FaissAssert.h>
19
21
  #include <faiss/impl/io.h>
@@ -22,343 +24,383 @@
22
24
 
23
25
  #include <faiss/invlists/InvertedListsIOHook.h>
24
26
 
27
+ #include <faiss/Index2Layer.h>
25
28
  #include <faiss/IndexFlat.h>
26
- #include <faiss/VectorTransform.h>
27
- #include <faiss/IndexPreTransform.h>
28
- #include <faiss/IndexLSH.h>
29
- #include <faiss/IndexPQ.h>
29
+ #include <faiss/IndexHNSW.h>
30
30
  #include <faiss/IndexIVF.h>
31
+ #include <faiss/IndexIVFFlat.h>
31
32
  #include <faiss/IndexIVFPQ.h>
33
+ #include <faiss/IndexIVFPQFastScan.h>
32
34
  #include <faiss/IndexIVFPQR.h>
33
- #include <faiss/Index2Layer.h>
34
- #include <faiss/IndexIVFFlat.h>
35
35
  #include <faiss/IndexIVFSpectralHash.h>
36
- #include <faiss/MetaIndexes.h>
37
- #include <faiss/IndexScalarQuantizer.h>
38
- #include <faiss/IndexHNSW.h>
36
+ #include <faiss/IndexLSH.h>
39
37
  #include <faiss/IndexLattice.h>
38
+ #include <faiss/IndexNSG.h>
39
+ #include <faiss/IndexPQ.h>
40
40
  #include <faiss/IndexPQFastScan.h>
41
- #include <faiss/IndexIVFPQFastScan.h>
41
+ #include <faiss/IndexPreTransform.h>
42
42
  #include <faiss/IndexRefine.h>
43
+ #include <faiss/IndexResidual.h>
44
+ #include <faiss/IndexScalarQuantizer.h>
45
+ #include <faiss/MetaIndexes.h>
46
+ #include <faiss/VectorTransform.h>
43
47
 
44
48
  #include <faiss/IndexBinaryFlat.h>
45
49
  #include <faiss/IndexBinaryFromFloat.h>
46
50
  #include <faiss/IndexBinaryHNSW.h>
47
- #include <faiss/IndexBinaryIVF.h>
48
51
  #include <faiss/IndexBinaryHash.h>
52
+ #include <faiss/IndexBinaryIVF.h>
49
53
 
50
54
  namespace faiss {
51
55
 
52
-
53
56
  /*************************************************************
54
57
  * Read
55
58
  **************************************************************/
56
59
 
57
- static void read_index_header (Index *idx, IOReader *f) {
58
- READ1 (idx->d);
59
- READ1 (idx->ntotal);
60
+ static void read_index_header(Index* idx, IOReader* f) {
61
+ READ1(idx->d);
62
+ READ1(idx->ntotal);
60
63
  Index::idx_t dummy;
61
- READ1 (dummy);
62
- READ1 (dummy);
63
- READ1 (idx->is_trained);
64
- READ1 (idx->metric_type);
64
+ READ1(dummy);
65
+ READ1(dummy);
66
+ READ1(idx->is_trained);
67
+ READ1(idx->metric_type);
65
68
  if (idx->metric_type > 1) {
66
- READ1 (idx->metric_arg);
69
+ READ1(idx->metric_arg);
67
70
  }
68
71
  idx->verbose = false;
69
72
  }
70
73
 
71
- VectorTransform* read_VectorTransform (IOReader *f) {
74
+ VectorTransform* read_VectorTransform(IOReader* f) {
72
75
  uint32_t h;
73
- READ1 (h);
74
- VectorTransform *vt = nullptr;
75
-
76
- if (h == fourcc ("rrot") || h == fourcc ("PCAm") ||
77
- h == fourcc ("LTra") || h == fourcc ("PcAm") ||
78
- h == fourcc ("Viqm")) {
79
- LinearTransform *lt = nullptr;
80
- if (h == fourcc ("rrot")) {
81
- lt = new RandomRotationMatrix ();
82
- } else if (h == fourcc ("PCAm") ||
83
- h == fourcc ("PcAm")) {
84
- PCAMatrix * pca = new PCAMatrix ();
85
- READ1 (pca->eigen_power);
86
- READ1 (pca->random_rotation);
87
- if (h == fourcc ("PcAm"))
88
- READ1 (pca->balanced_bins);
89
- READVECTOR (pca->mean);
90
- READVECTOR (pca->eigenvalues);
91
- READVECTOR (pca->PCAMat);
76
+ READ1(h);
77
+ VectorTransform* vt = nullptr;
78
+
79
+ if (h == fourcc("rrot") || h == fourcc("PCAm") || h == fourcc("LTra") ||
80
+ h == fourcc("PcAm") || h == fourcc("Viqm")) {
81
+ LinearTransform* lt = nullptr;
82
+ if (h == fourcc("rrot")) {
83
+ lt = new RandomRotationMatrix();
84
+ } else if (h == fourcc("PCAm") || h == fourcc("PcAm")) {
85
+ PCAMatrix* pca = new PCAMatrix();
86
+ READ1(pca->eigen_power);
87
+ READ1(pca->random_rotation);
88
+ if (h == fourcc("PcAm"))
89
+ READ1(pca->balanced_bins);
90
+ READVECTOR(pca->mean);
91
+ READVECTOR(pca->eigenvalues);
92
+ READVECTOR(pca->PCAMat);
92
93
  lt = pca;
93
- } else if (h == fourcc ("Viqm")) {
94
- ITQMatrix *itqm = new ITQMatrix ();
95
- READ1 (itqm->max_iter);
96
- READ1 (itqm->seed);
94
+ } else if (h == fourcc("Viqm")) {
95
+ ITQMatrix* itqm = new ITQMatrix();
96
+ READ1(itqm->max_iter);
97
+ READ1(itqm->seed);
97
98
  lt = itqm;
98
- } else if (h == fourcc ("LTra")) {
99
- lt = new LinearTransform ();
99
+ } else if (h == fourcc("LTra")) {
100
+ lt = new LinearTransform();
100
101
  }
101
- READ1 (lt->have_bias);
102
- READVECTOR (lt->A);
103
- READVECTOR (lt->b);
104
- FAISS_THROW_IF_NOT (lt->A.size() >= lt->d_in * lt->d_out);
105
- FAISS_THROW_IF_NOT (!lt->have_bias || lt->b.size() >= lt->d_out);
102
+ READ1(lt->have_bias);
103
+ READVECTOR(lt->A);
104
+ READVECTOR(lt->b);
105
+ FAISS_THROW_IF_NOT(lt->A.size() >= lt->d_in * lt->d_out);
106
+ FAISS_THROW_IF_NOT(!lt->have_bias || lt->b.size() >= lt->d_out);
106
107
  lt->set_is_orthonormal();
107
108
  vt = lt;
108
- } else if (h == fourcc ("RmDT")) {
109
- RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
110
- READVECTOR (rdt->map);
109
+ } else if (h == fourcc("RmDT")) {
110
+ RemapDimensionsTransform* rdt = new RemapDimensionsTransform();
111
+ READVECTOR(rdt->map);
111
112
  vt = rdt;
112
- } else if (h == fourcc ("VNrm")) {
113
- NormalizationTransform *nt = new NormalizationTransform ();
114
- READ1 (nt->norm);
113
+ } else if (h == fourcc("VNrm")) {
114
+ NormalizationTransform* nt = new NormalizationTransform();
115
+ READ1(nt->norm);
115
116
  vt = nt;
116
- } else if (h == fourcc ("VCnt")) {
117
- CenteringTransform *ct = new CenteringTransform ();
118
- READVECTOR (ct->mean);
117
+ } else if (h == fourcc("VCnt")) {
118
+ CenteringTransform* ct = new CenteringTransform();
119
+ READVECTOR(ct->mean);
119
120
  vt = ct;
120
- } else if (h == fourcc ("Viqt")) {
121
- ITQTransform *itqt = new ITQTransform ();
121
+ } else if (h == fourcc("Viqt")) {
122
+ ITQTransform* itqt = new ITQTransform();
122
123
 
123
- READVECTOR (itqt->mean);
124
- READ1 (itqt->do_pca);
124
+ READVECTOR(itqt->mean);
125
+ READ1(itqt->do_pca);
125
126
  {
126
- ITQMatrix *itqm = dynamic_cast<ITQMatrix*>
127
- (read_VectorTransform (f));
127
+ ITQMatrix* itqm = dynamic_cast<ITQMatrix*>(read_VectorTransform(f));
128
128
  FAISS_THROW_IF_NOT(itqm);
129
129
  itqt->itq = *itqm;
130
130
  delete itqm;
131
131
  }
132
132
  {
133
- LinearTransform *pi = dynamic_cast<LinearTransform*>
134
- (read_VectorTransform (f));
135
- FAISS_THROW_IF_NOT (pi);
133
+ LinearTransform* pi =
134
+ dynamic_cast<LinearTransform*>(read_VectorTransform(f));
135
+ FAISS_THROW_IF_NOT(pi);
136
136
  itqt->pca_then_itq = *pi;
137
137
  delete pi;
138
138
  }
139
139
  vt = itqt;
140
140
  } else {
141
141
  FAISS_THROW_FMT(
142
- "fourcc %ud (\"%s\") not recognized",
143
- h, fourcc_inv_printable(h).c_str()
144
- );
142
+ "fourcc %ud (\"%s\") not recognized",
143
+ h,
144
+ fourcc_inv_printable(h).c_str());
145
145
  }
146
- READ1 (vt->d_in);
147
- READ1 (vt->d_out);
148
- READ1 (vt->is_trained);
146
+ READ1(vt->d_in);
147
+ READ1(vt->d_out);
148
+ READ1(vt->is_trained);
149
149
  return vt;
150
150
  }
151
151
 
152
-
153
- static void read_ArrayInvertedLists_sizes (
154
- IOReader *f, std::vector<size_t> & sizes)
155
- {
152
+ static void read_ArrayInvertedLists_sizes(
153
+ IOReader* f,
154
+ std::vector<size_t>& sizes) {
156
155
  uint32_t list_type;
157
156
  READ1(list_type);
158
157
  if (list_type == fourcc("full")) {
159
158
  size_t os = sizes.size();
160
- READVECTOR (sizes);
161
- FAISS_THROW_IF_NOT (os == sizes.size());
159
+ READVECTOR(sizes);
160
+ FAISS_THROW_IF_NOT(os == sizes.size());
162
161
  } else if (list_type == fourcc("sprs")) {
163
162
  std::vector<size_t> idsizes;
164
- READVECTOR (idsizes);
163
+ READVECTOR(idsizes);
165
164
  for (size_t j = 0; j < idsizes.size(); j += 2) {
166
- FAISS_THROW_IF_NOT (idsizes[j] < sizes.size());
165
+ FAISS_THROW_IF_NOT(idsizes[j] < sizes.size());
167
166
  sizes[idsizes[j]] = idsizes[j + 1];
168
167
  }
169
168
  } else {
170
169
  FAISS_THROW_FMT(
171
- "list_type %ud (\"%s\") not recognized",
172
- list_type, fourcc_inv_printable(list_type).c_str()
173
- );
170
+ "list_type %ud (\"%s\") not recognized",
171
+ list_type,
172
+ fourcc_inv_printable(list_type).c_str());
174
173
  }
175
174
  }
176
175
 
177
- InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
176
+ InvertedLists* read_InvertedLists(IOReader* f, int io_flags) {
178
177
  uint32_t h;
179
- READ1 (h);
180
- if (h == fourcc ("il00")) {
181
- fprintf(stderr, "read_InvertedLists:"
178
+ READ1(h);
179
+ if (h == fourcc("il00")) {
180
+ fprintf(stderr,
181
+ "read_InvertedLists:"
182
182
  " WARN! inverted lists not stored with IVF object\n");
183
183
  return nullptr;
184
- } else if (h == fourcc ("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
185
- auto ails = new ArrayInvertedLists (0, 0);
186
- READ1 (ails->nlist);
187
- READ1 (ails->code_size);
188
- ails->ids.resize (ails->nlist);
189
- ails->codes.resize (ails->nlist);
190
- std::vector<size_t> sizes (ails->nlist);
191
- read_ArrayInvertedLists_sizes (f, sizes);
184
+ } else if (h == fourcc("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
185
+ auto ails = new ArrayInvertedLists(0, 0);
186
+ READ1(ails->nlist);
187
+ READ1(ails->code_size);
188
+ ails->ids.resize(ails->nlist);
189
+ ails->codes.resize(ails->nlist);
190
+ std::vector<size_t> sizes(ails->nlist);
191
+ read_ArrayInvertedLists_sizes(f, sizes);
192
192
  for (size_t i = 0; i < ails->nlist; i++) {
193
- ails->ids[i].resize (sizes[i]);
194
- ails->codes[i].resize (sizes[i] * ails->code_size);
193
+ ails->ids[i].resize(sizes[i]);
194
+ ails->codes[i].resize(sizes[i] * ails->code_size);
195
195
  }
196
196
  for (size_t i = 0; i < ails->nlist; i++) {
197
197
  size_t n = ails->ids[i].size();
198
198
  if (n > 0) {
199
- READANDCHECK (ails->codes[i].data(), n * ails->code_size);
200
- READANDCHECK (ails->ids[i].data(), n);
199
+ READANDCHECK(ails->codes[i].data(), n * ails->code_size);
200
+ READANDCHECK(ails->ids[i].data(), n);
201
201
  }
202
202
  }
203
203
  return ails;
204
204
 
205
- } else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
206
- // code is always ilxx where xx is specific to the type of invlists we want
207
- // so we get the 16 high bits from the io_flag and the 16 low bits as "il"
205
+ } else if (h == fourcc("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
206
+ // code is always ilxx where xx is specific to the type of invlists we
207
+ // want so we get the 16 high bits from the io_flag and the 16 low bits
208
+ // as "il"
208
209
  int h2 = (io_flags & 0xffff0000) | (fourcc("il__") & 0x0000ffff);
209
210
  size_t nlist, code_size;
210
- READ1 (nlist);
211
- READ1 (code_size);
212
- std::vector<size_t> sizes (nlist);
213
- read_ArrayInvertedLists_sizes (f, sizes);
211
+ READ1(nlist);
212
+ READ1(code_size);
213
+ std::vector<size_t> sizes(nlist);
214
+ read_ArrayInvertedLists_sizes(f, sizes);
214
215
  return InvertedListsIOHook::lookup(h2)->read_ArrayInvertedLists(
215
216
  f, io_flags, nlist, code_size, sizes);
216
217
  } else {
217
218
  return InvertedListsIOHook::lookup(h)->read(f, io_flags);
218
219
  }
219
-
220
220
  }
221
221
 
222
-
223
- static void read_InvertedLists (
224
- IndexIVF *ivf, IOReader *f, int io_flags) {
225
- InvertedLists *ils = read_InvertedLists (f, io_flags);
222
+ static void read_InvertedLists(IndexIVF* ivf, IOReader* f, int io_flags) {
223
+ InvertedLists* ils = read_InvertedLists(f, io_flags);
226
224
  if (ils) {
227
- FAISS_THROW_IF_NOT (ils->nlist == ivf->nlist);
228
- FAISS_THROW_IF_NOT (ils->code_size == InvertedLists::INVALID_CODE_SIZE ||
229
- ils->code_size == ivf->code_size);
225
+ FAISS_THROW_IF_NOT(ils->nlist == ivf->nlist);
226
+ FAISS_THROW_IF_NOT(
227
+ ils->code_size == InvertedLists::INVALID_CODE_SIZE ||
228
+ ils->code_size == ivf->code_size);
230
229
  }
231
230
  ivf->invlists = ils;
232
231
  ivf->own_invlists = true;
233
232
  }
234
233
 
235
- static void read_ProductQuantizer (ProductQuantizer *pq, IOReader *f) {
236
- READ1 (pq->d);
237
- READ1 (pq->M);
238
- READ1 (pq->nbits);
239
- pq->set_derived_values ();
240
- READVECTOR (pq->centroids);
234
+ static void read_ProductQuantizer(ProductQuantizer* pq, IOReader* f) {
235
+ READ1(pq->d);
236
+ READ1(pq->M);
237
+ READ1(pq->nbits);
238
+ pq->set_derived_values();
239
+ READVECTOR(pq->centroids);
240
+ }
241
+
242
+ static void read_ResidualQuantizer(ResidualQuantizer* rq, IOReader* f) {
243
+ READ1(rq->d);
244
+ READ1(rq->M);
245
+ READVECTOR(rq->nbits);
246
+ rq->set_derived_values();
247
+ READ1(rq->is_trained);
248
+ READ1(rq->train_type);
249
+ READ1(rq->max_beam_size);
250
+ READVECTOR(rq->codebooks);
241
251
  }
242
252
 
243
- static void read_ScalarQuantizer (ScalarQuantizer *ivsc, IOReader *f) {
244
- READ1 (ivsc->qtype);
245
- READ1 (ivsc->rangestat);
246
- READ1 (ivsc->rangestat_arg);
247
- READ1 (ivsc->d);
248
- READ1 (ivsc->code_size);
249
- READVECTOR (ivsc->trained);
250
- ivsc->set_derived_sizes ();
253
+ static void read_ScalarQuantizer(ScalarQuantizer* ivsc, IOReader* f) {
254
+ READ1(ivsc->qtype);
255
+ READ1(ivsc->rangestat);
256
+ READ1(ivsc->rangestat_arg);
257
+ READ1(ivsc->d);
258
+ READ1(ivsc->code_size);
259
+ READVECTOR(ivsc->trained);
260
+ ivsc->set_derived_sizes();
251
261
  }
252
262
 
263
+ static void read_HNSW(HNSW* hnsw, IOReader* f) {
264
+ READVECTOR(hnsw->assign_probas);
265
+ READVECTOR(hnsw->cum_nneighbor_per_level);
266
+ READVECTOR(hnsw->levels);
267
+ READVECTOR(hnsw->offsets);
268
+ READVECTOR(hnsw->neighbors);
269
+
270
+ READ1(hnsw->entry_point);
271
+ READ1(hnsw->max_level);
272
+ READ1(hnsw->efConstruction);
273
+ READ1(hnsw->efSearch);
274
+ READ1(hnsw->upper_beam);
275
+ }
253
276
 
254
- static void read_HNSW (HNSW *hnsw, IOReader *f) {
255
- READVECTOR (hnsw->assign_probas);
256
- READVECTOR (hnsw->cum_nneighbor_per_level);
257
- READVECTOR (hnsw->levels);
258
- READVECTOR (hnsw->offsets);
259
- READVECTOR (hnsw->neighbors);
277
+ static void read_NSG(NSG* nsg, IOReader* f) {
278
+ READ1(nsg->ntotal);
279
+ READ1(nsg->R);
280
+ READ1(nsg->L);
281
+ READ1(nsg->C);
282
+ READ1(nsg->search_L);
283
+ READ1(nsg->enterpoint);
284
+ READ1(nsg->is_built);
285
+
286
+ if (!nsg->is_built) {
287
+ return;
288
+ }
260
289
 
261
- READ1 (hnsw->entry_point);
262
- READ1 (hnsw->max_level);
263
- READ1 (hnsw->efConstruction);
264
- READ1 (hnsw->efSearch);
265
- READ1 (hnsw->upper_beam);
290
+ constexpr int EMPTY_ID = -1;
291
+ int N = nsg->ntotal;
292
+ int R = nsg->R;
293
+ auto& graph = nsg->final_graph;
294
+ graph = std::make_shared<nsg::Graph<int>>(N, R);
295
+ std::fill_n(graph->data, N * R, EMPTY_ID);
296
+
297
+ int size = 0;
298
+
299
+ for (int i = 0; i < N; i++) {
300
+ for (int j = 0; j < R + 1; j++) {
301
+ int id;
302
+ READ1(id);
303
+ if (id != EMPTY_ID) {
304
+ graph->at(i, j) = id;
305
+ size += 1;
306
+ } else {
307
+ break;
308
+ }
309
+ }
310
+ }
266
311
  }
267
312
 
268
- ProductQuantizer * read_ProductQuantizer (const char*fname) {
313
+ ProductQuantizer* read_ProductQuantizer(const char* fname) {
269
314
  FileIOReader reader(fname);
270
315
  return read_ProductQuantizer(&reader);
271
316
  }
272
317
 
273
- ProductQuantizer * read_ProductQuantizer (IOReader *reader) {
274
- ProductQuantizer *pq = new ProductQuantizer();
275
- ScopeDeleter1<ProductQuantizer> del (pq);
318
+ ProductQuantizer* read_ProductQuantizer(IOReader* reader) {
319
+ ProductQuantizer* pq = new ProductQuantizer();
320
+ ScopeDeleter1<ProductQuantizer> del(pq);
276
321
 
277
- read_ProductQuantizer(pq, reader);
278
- del.release ();
279
- return pq;
322
+ read_ProductQuantizer(pq, reader);
323
+ del.release();
324
+ return pq;
280
325
  }
281
326
 
282
- static void read_direct_map (DirectMap *dm, IOReader *f) {
327
+ static void read_direct_map(DirectMap* dm, IOReader* f) {
283
328
  char maintain_direct_map;
284
- READ1 (maintain_direct_map);
329
+ READ1(maintain_direct_map);
285
330
  dm->type = (DirectMap::Type)maintain_direct_map;
286
- READVECTOR (dm->array);
331
+ READVECTOR(dm->array);
287
332
  if (dm->type == DirectMap::Hashtable) {
288
333
  using idx_t = Index::idx_t;
289
334
  std::vector<std::pair<idx_t, idx_t>> v;
290
- READVECTOR (v);
291
- std::unordered_map<idx_t, idx_t> & map = dm->hashtable;
292
- map.reserve (v.size());
293
- for (auto it: v) {
294
- map [it.first] = it.second;
335
+ READVECTOR(v);
336
+ std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
337
+ map.reserve(v.size());
338
+ for (auto it : v) {
339
+ map[it.first] = it.second;
295
340
  }
296
341
  }
297
-
298
342
  }
299
343
 
300
-
301
- static void read_ivf_header (
302
- IndexIVF *ivf, IOReader *f,
303
- std::vector<std::vector<Index::idx_t> > *ids = nullptr)
304
- {
305
- read_index_header (ivf, f);
306
- READ1 (ivf->nlist);
307
- READ1 (ivf->nprobe);
308
- ivf->quantizer = read_index (f);
344
+ static void read_ivf_header(
345
+ IndexIVF* ivf,
346
+ IOReader* f,
347
+ std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
348
+ read_index_header(ivf, f);
349
+ READ1(ivf->nlist);
350
+ READ1(ivf->nprobe);
351
+ ivf->quantizer = read_index(f);
309
352
  ivf->own_fields = true;
310
353
  if (ids) { // used in legacy "Iv" formats
311
- ids->resize (ivf->nlist);
354
+ ids->resize(ivf->nlist);
312
355
  for (size_t i = 0; i < ivf->nlist; i++)
313
- READVECTOR ((*ids)[i]);
356
+ READVECTOR((*ids)[i]);
314
357
  }
315
- read_direct_map (&ivf->direct_map, f);
358
+ read_direct_map(&ivf->direct_map, f);
316
359
  }
317
360
 
318
361
  // used for legacy formats
319
- static ArrayInvertedLists *set_array_invlist(
320
- IndexIVF *ivf, std::vector<std::vector<Index::idx_t> > &ids)
321
- {
322
- ArrayInvertedLists *ail = new ArrayInvertedLists (
323
- ivf->nlist, ivf->code_size);
324
- std::swap (ail->ids, ids);
362
+ static ArrayInvertedLists* set_array_invlist(
363
+ IndexIVF* ivf,
364
+ std::vector<std::vector<Index::idx_t>>& ids) {
365
+ ArrayInvertedLists* ail =
366
+ new ArrayInvertedLists(ivf->nlist, ivf->code_size);
367
+ std::swap(ail->ids, ids);
325
368
  ivf->invlists = ail;
326
369
  ivf->own_invlists = true;
327
370
  return ail;
328
371
  }
329
372
 
330
- static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags)
331
- {
332
- bool legacy = h == fourcc ("IvQR") || h == fourcc ("IvPQ");
373
+ static IndexIVFPQ* read_ivfpq(IOReader* f, uint32_t h, int io_flags) {
374
+ bool legacy = h == fourcc("IvQR") || h == fourcc("IvPQ");
333
375
 
334
- IndexIVFPQR *ivfpqr =
335
- h == fourcc ("IvQR") || h == fourcc ("IwQR") ?
336
- new IndexIVFPQR () : nullptr;
337
- IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ ();
376
+ IndexIVFPQR* ivfpqr = h == fourcc("IvQR") || h == fourcc("IwQR")
377
+ ? new IndexIVFPQR()
378
+ : nullptr;
379
+ IndexIVFPQ* ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ();
338
380
 
339
- std::vector<std::vector<Index::idx_t> > ids;
340
- read_ivf_header (ivpq, f, legacy ? &ids : nullptr);
341
- READ1 (ivpq->by_residual);
342
- READ1 (ivpq->code_size);
343
- read_ProductQuantizer (&ivpq->pq, f);
381
+ std::vector<std::vector<Index::idx_t>> ids;
382
+ read_ivf_header(ivpq, f, legacy ? &ids : nullptr);
383
+ READ1(ivpq->by_residual);
384
+ READ1(ivpq->code_size);
385
+ read_ProductQuantizer(&ivpq->pq, f);
344
386
 
345
387
  if (legacy) {
346
- ArrayInvertedLists *ail = set_array_invlist (ivpq, ids);
388
+ ArrayInvertedLists* ail = set_array_invlist(ivpq, ids);
347
389
  for (size_t i = 0; i < ail->nlist; i++)
348
- READVECTOR (ail->codes[i]);
390
+ READVECTOR(ail->codes[i]);
349
391
  } else {
350
- read_InvertedLists (ivpq, f, io_flags);
392
+ read_InvertedLists(ivpq, f, io_flags);
351
393
  }
352
394
 
353
395
  if (ivpq->is_trained) {
354
396
  // precomputed table not stored. It is cheaper to recompute it
355
397
  ivpq->use_precomputed_table = 0;
356
398
  if (ivpq->by_residual)
357
- ivpq->precompute_table ();
399
+ ivpq->precompute_table();
358
400
  if (ivfpqr) {
359
- read_ProductQuantizer (&ivfpqr->refine_pq, f);
360
- READVECTOR (ivfpqr->refine_codes);
361
- READ1 (ivfpqr->k_factor);
401
+ read_ProductQuantizer(&ivfpqr->refine_pq, f);
402
+ READVECTOR(ivfpqr->refine_codes);
403
+ READ1(ivfpqr->k_factor);
362
404
  }
363
405
  }
364
406
  return ivpq;
@@ -366,200 +408,216 @@ static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags)
366
408
 
367
409
  int read_old_fmt_hack = 0;
368
410
 
369
- Index *read_index (IOReader *f, int io_flags) {
370
- Index * idx = nullptr;
411
+ Index* read_index(IOReader* f, int io_flags) {
412
+ Index* idx = nullptr;
371
413
  uint32_t h;
372
- READ1 (h);
373
- if (h == fourcc ("IxFI") || h == fourcc ("IxF2") || h == fourcc("IxFl")) {
374
- IndexFlat *idxf;
375
- if (h == fourcc ("IxFI")) {
376
- idxf = new IndexFlatIP ();
414
+ READ1(h);
415
+ if (h == fourcc("IxFI") || h == fourcc("IxF2") || h == fourcc("IxFl")) {
416
+ IndexFlat* idxf;
417
+ if (h == fourcc("IxFI")) {
418
+ idxf = new IndexFlatIP();
377
419
  } else if (h == fourcc("IxF2")) {
378
- idxf = new IndexFlatL2 ();
420
+ idxf = new IndexFlatL2();
379
421
  } else {
380
- idxf = new IndexFlat ();
422
+ idxf = new IndexFlat();
381
423
  }
382
- read_index_header (idxf, f);
383
- READVECTOR (idxf->xb);
384
- FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->d);
424
+ read_index_header(idxf, f);
425
+ READVECTOR(idxf->xb);
426
+ FAISS_THROW_IF_NOT(idxf->xb.size() == idxf->ntotal * idxf->d);
385
427
  // leak!
386
428
  idx = idxf;
387
429
  } else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
388
- IndexLSH * idxl = new IndexLSH ();
389
- read_index_header (idxl, f);
390
- READ1 (idxl->nbits);
391
- READ1 (idxl->rotate_data);
392
- READ1 (idxl->train_thresholds);
393
- READVECTOR (idxl->thresholds);
394
- READ1 (idxl->bytes_per_vec);
430
+ IndexLSH* idxl = new IndexLSH();
431
+ read_index_header(idxl, f);
432
+ READ1(idxl->nbits);
433
+ READ1(idxl->rotate_data);
434
+ READ1(idxl->train_thresholds);
435
+ READVECTOR(idxl->thresholds);
436
+ READ1(idxl->bytes_per_vec);
395
437
  if (h == fourcc("IxHE")) {
396
- FAISS_THROW_IF_NOT_FMT (idxl->nbits % 64 == 0,
397
- "can only read old format IndexLSH with "
398
- "nbits multiple of 64 (got %d)",
399
- (int) idxl->nbits);
438
+ FAISS_THROW_IF_NOT_FMT(
439
+ idxl->nbits % 64 == 0,
440
+ "can only read old format IndexLSH with "
441
+ "nbits multiple of 64 (got %d)",
442
+ (int)idxl->nbits);
400
443
  // leak
401
444
  idxl->bytes_per_vec *= 8;
402
445
  }
403
446
  {
404
- RandomRotationMatrix *rrot = dynamic_cast<RandomRotationMatrix *>
405
- (read_VectorTransform (f));
447
+ RandomRotationMatrix* rrot = dynamic_cast<RandomRotationMatrix*>(
448
+ read_VectorTransform(f));
406
449
  FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation");
407
450
  idxl->rrot = *rrot;
408
451
  delete rrot;
409
452
  }
410
- READVECTOR (idxl->codes);
411
- FAISS_THROW_IF_NOT (idxl->rrot.d_in == idxl->d &&
412
- idxl->rrot.d_out == idxl->nbits);
413
- FAISS_THROW_IF_NOT (
414
- idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
453
+ READVECTOR(idxl->codes);
454
+ FAISS_THROW_IF_NOT(
455
+ idxl->rrot.d_in == idxl->d && idxl->rrot.d_out == idxl->nbits);
456
+ FAISS_THROW_IF_NOT(
457
+ idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
415
458
  idx = idxl;
416
- } else if (h == fourcc ("IxPQ") || h == fourcc ("IxPo") ||
417
- h == fourcc ("IxPq")) {
459
+ } else if (
460
+ h == fourcc("IxPQ") || h == fourcc("IxPo") || h == fourcc("IxPq")) {
418
461
  // IxPQ and IxPo were merged into the same IndexPQ object
419
- IndexPQ * idxp =new IndexPQ ();
420
- read_index_header (idxp, f);
421
- read_ProductQuantizer (&idxp->pq, f);
422
- READVECTOR (idxp->codes);
423
- if (h == fourcc ("IxPo") || h == fourcc ("IxPq")) {
424
- READ1 (idxp->search_type);
425
- READ1 (idxp->encode_signs);
426
- READ1 (idxp->polysemous_ht);
462
+ IndexPQ* idxp = new IndexPQ();
463
+ read_index_header(idxp, f);
464
+ read_ProductQuantizer(&idxp->pq, f);
465
+ READVECTOR(idxp->codes);
466
+ if (h == fourcc("IxPo") || h == fourcc("IxPq")) {
467
+ READ1(idxp->search_type);
468
+ READ1(idxp->encode_signs);
469
+ READ1(idxp->polysemous_ht);
427
470
  }
428
471
  // Old versoins of PQ all had metric_type set to INNER_PRODUCT
429
472
  // when they were in fact using L2. Therefore, we force metric type
430
473
  // to L2 when the old format is detected
431
- if (h == fourcc ("IxPQ") || h == fourcc ("IxPo")) {
474
+ if (h == fourcc("IxPQ") || h == fourcc("IxPo")) {
432
475
  idxp->metric_type = METRIC_L2;
433
476
  }
434
477
  idx = idxp;
435
- } else if (h == fourcc ("IvFl") || h == fourcc("IvFL")) { // legacy
436
- IndexIVFFlat * ivfl = new IndexIVFFlat ();
437
- std::vector<std::vector<Index::idx_t> > ids;
438
- read_ivf_header (ivfl, f, &ids);
478
+ } else if (h == fourcc("IxRQ")) {
479
+ IndexResidual* idxr = new IndexResidual();
480
+ read_index_header(idxr, f);
481
+ read_ResidualQuantizer(&idxr->rq, f);
482
+ READ1(idxr->search_type);
483
+ READ1(idxr->norm_min);
484
+ READ1(idxr->norm_max);
485
+ READ1(idxr->code_size);
486
+ READVECTOR(idxr->codes);
487
+ idx = idxr;
488
+ } else if (h == fourcc("ImRQ")) {
489
+ ResidualCoarseQuantizer* idxr = new ResidualCoarseQuantizer();
490
+ read_index_header(idxr, f);
491
+ read_ResidualQuantizer(&idxr->rq, f);
492
+ READ1(idxr->beam_factor);
493
+ idxr->set_beam_factor(idxr->beam_factor);
494
+ idx = idxr;
495
+ } else if (h == fourcc("IvFl") || h == fourcc("IvFL")) { // legacy
496
+ IndexIVFFlat* ivfl = new IndexIVFFlat();
497
+ std::vector<std::vector<Index::idx_t>> ids;
498
+ read_ivf_header(ivfl, f, &ids);
439
499
  ivfl->code_size = ivfl->d * sizeof(float);
440
- ArrayInvertedLists *ail = set_array_invlist (ivfl, ids);
500
+ ArrayInvertedLists* ail = set_array_invlist(ivfl, ids);
441
501
 
442
- if (h == fourcc ("IvFL")) {
502
+ if (h == fourcc("IvFL")) {
443
503
  for (size_t i = 0; i < ivfl->nlist; i++) {
444
- READVECTOR (ail->codes[i]);
504
+ READVECTOR(ail->codes[i]);
445
505
  }
446
506
  } else { // old format
447
507
  for (size_t i = 0; i < ivfl->nlist; i++) {
448
508
  std::vector<float> vec;
449
- READVECTOR (vec);
509
+ READVECTOR(vec);
450
510
  ail->codes[i].resize(vec.size() * sizeof(float));
451
- memcpy(ail->codes[i].data(), vec.data(),
452
- ail->codes[i].size());
511
+ memcpy(ail->codes[i].data(), vec.data(), ail->codes[i].size());
453
512
  }
454
513
  }
455
514
  idx = ivfl;
456
- } else if (h == fourcc ("IwFd")) {
457
- IndexIVFFlatDedup * ivfl = new IndexIVFFlatDedup ();
458
- read_ivf_header (ivfl, f);
515
+ } else if (h == fourcc("IwFd")) {
516
+ IndexIVFFlatDedup* ivfl = new IndexIVFFlatDedup();
517
+ read_ivf_header(ivfl, f);
459
518
  ivfl->code_size = ivfl->d * sizeof(float);
460
519
  {
461
520
  std::vector<Index::idx_t> tab;
462
- READVECTOR (tab);
521
+ READVECTOR(tab);
463
522
  for (long i = 0; i < tab.size(); i += 2) {
464
- std::pair<Index::idx_t, Index::idx_t>
465
- pair (tab[i], tab[i + 1]);
466
- ivfl->instances.insert (pair);
523
+ std::pair<Index::idx_t, Index::idx_t> pair(tab[i], tab[i + 1]);
524
+ ivfl->instances.insert(pair);
467
525
  }
468
526
  }
469
- read_InvertedLists (ivfl, f, io_flags);
527
+ read_InvertedLists(ivfl, f, io_flags);
470
528
  idx = ivfl;
471
- } else if (h == fourcc ("IwFl")) {
472
- IndexIVFFlat * ivfl = new IndexIVFFlat ();
473
- read_ivf_header (ivfl, f);
529
+ } else if (h == fourcc("IwFl")) {
530
+ IndexIVFFlat* ivfl = new IndexIVFFlat();
531
+ read_ivf_header(ivfl, f);
474
532
  ivfl->code_size = ivfl->d * sizeof(float);
475
- read_InvertedLists (ivfl, f, io_flags);
533
+ read_InvertedLists(ivfl, f, io_flags);
476
534
  idx = ivfl;
477
- } else if (h == fourcc ("IxSQ")) {
478
- IndexScalarQuantizer * idxs = new IndexScalarQuantizer ();
479
- read_index_header (idxs, f);
480
- read_ScalarQuantizer (&idxs->sq, f);
481
- READVECTOR (idxs->codes);
535
+ } else if (h == fourcc("IxSQ")) {
536
+ IndexScalarQuantizer* idxs = new IndexScalarQuantizer();
537
+ read_index_header(idxs, f);
538
+ read_ScalarQuantizer(&idxs->sq, f);
539
+ READVECTOR(idxs->codes);
482
540
  idxs->code_size = idxs->sq.code_size;
483
541
  idx = idxs;
484
- } else if (h == fourcc ("IxLa")) {
542
+ } else if (h == fourcc("IxLa")) {
485
543
  int d, nsq, scale_nbit, r2;
486
- READ1 (d);
487
- READ1 (nsq);
488
- READ1 (scale_nbit);
489
- READ1 (r2);
490
- IndexLattice *idxl = new IndexLattice (d, nsq, scale_nbit, r2);
491
- read_index_header (idxl, f);
492
- READVECTOR (idxl->trained);
544
+ READ1(d);
545
+ READ1(nsq);
546
+ READ1(scale_nbit);
547
+ READ1(r2);
548
+ IndexLattice* idxl = new IndexLattice(d, nsq, scale_nbit, r2);
549
+ read_index_header(idxl, f);
550
+ READVECTOR(idxl->trained);
493
551
  idx = idxl;
494
- } else if(h == fourcc ("IvSQ")) { // legacy
495
- IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
496
- std::vector<std::vector<Index::idx_t> > ids;
497
- read_ivf_header (ivsc, f, &ids);
498
- read_ScalarQuantizer (&ivsc->sq, f);
499
- READ1 (ivsc->code_size);
500
- ArrayInvertedLists *ail = set_array_invlist (ivsc, ids);
501
- for(int i = 0; i < ivsc->nlist; i++)
502
- READVECTOR (ail->codes[i]);
552
+ } else if (h == fourcc("IvSQ")) { // legacy
553
+ IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
554
+ std::vector<std::vector<Index::idx_t>> ids;
555
+ read_ivf_header(ivsc, f, &ids);
556
+ read_ScalarQuantizer(&ivsc->sq, f);
557
+ READ1(ivsc->code_size);
558
+ ArrayInvertedLists* ail = set_array_invlist(ivsc, ids);
559
+ for (int i = 0; i < ivsc->nlist; i++)
560
+ READVECTOR(ail->codes[i]);
503
561
  idx = ivsc;
504
- } else if(h == fourcc ("IwSQ") || h == fourcc ("IwSq")) {
505
- IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
506
- read_ivf_header (ivsc, f);
507
- read_ScalarQuantizer (&ivsc->sq, f);
508
- READ1 (ivsc->code_size);
509
- if (h == fourcc ("IwSQ")) {
562
+ } else if (h == fourcc("IwSQ") || h == fourcc("IwSq")) {
563
+ IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
564
+ read_ivf_header(ivsc, f);
565
+ read_ScalarQuantizer(&ivsc->sq, f);
566
+ READ1(ivsc->code_size);
567
+ if (h == fourcc("IwSQ")) {
510
568
  ivsc->by_residual = true;
511
569
  } else {
512
- READ1 (ivsc->by_residual);
570
+ READ1(ivsc->by_residual);
513
571
  }
514
- read_InvertedLists (ivsc, f, io_flags);
572
+ read_InvertedLists(ivsc, f, io_flags);
515
573
  idx = ivsc;
516
- } else if(h == fourcc ("IwSh")) {
517
- IndexIVFSpectralHash *ivsp = new IndexIVFSpectralHash ();
518
- read_ivf_header (ivsp, f);
519
- ivsp->vt = read_VectorTransform (f);
574
+ } else if (h == fourcc("IwSh")) {
575
+ IndexIVFSpectralHash* ivsp = new IndexIVFSpectralHash();
576
+ read_ivf_header(ivsp, f);
577
+ ivsp->vt = read_VectorTransform(f);
520
578
  ivsp->own_fields = true;
521
- READ1 (ivsp->nbit);
579
+ READ1(ivsp->nbit);
522
580
  // not stored by write_ivf_header
523
581
  ivsp->code_size = (ivsp->nbit + 7) / 8;
524
- READ1 (ivsp->period);
525
- READ1 (ivsp->threshold_type);
526
- READVECTOR (ivsp->trained);
527
- read_InvertedLists (ivsp, f, io_flags);
582
+ READ1(ivsp->period);
583
+ READ1(ivsp->threshold_type);
584
+ READVECTOR(ivsp->trained);
585
+ read_InvertedLists(ivsp, f, io_flags);
528
586
  idx = ivsp;
529
- } else if(h == fourcc ("IvPQ") || h == fourcc ("IvQR") ||
530
- h == fourcc ("IwPQ") || h == fourcc ("IwQR")) {
531
-
532
- idx = read_ivfpq (f, h, io_flags);
587
+ } else if (
588
+ h == fourcc("IvPQ") || h == fourcc("IvQR") || h == fourcc("IwPQ") ||
589
+ h == fourcc("IwQR")) {
590
+ idx = read_ivfpq(f, h, io_flags);
533
591
 
534
- } else if(h == fourcc ("IxPT")) {
535
- IndexPreTransform * ixpt = new IndexPreTransform();
592
+ } else if (h == fourcc("IxPT")) {
593
+ IndexPreTransform* ixpt = new IndexPreTransform();
536
594
  ixpt->own_fields = true;
537
- read_index_header (ixpt, f);
595
+ read_index_header(ixpt, f);
538
596
  int nt;
539
597
  if (read_old_fmt_hack == 2) {
540
598
  nt = 1;
541
599
  } else {
542
- READ1 (nt);
600
+ READ1(nt);
543
601
  }
544
602
  for (int i = 0; i < nt; i++) {
545
- ixpt->chain.push_back (read_VectorTransform (f));
603
+ ixpt->chain.push_back(read_VectorTransform(f));
546
604
  }
547
- ixpt->index = read_index (f, io_flags);
605
+ ixpt->index = read_index(f, io_flags);
548
606
  idx = ixpt;
549
- } else if(h == fourcc ("Imiq")) {
550
- MultiIndexQuantizer * imiq = new MultiIndexQuantizer ();
551
- read_index_header (imiq, f);
552
- read_ProductQuantizer (&imiq->pq, f);
607
+ } else if (h == fourcc("Imiq")) {
608
+ MultiIndexQuantizer* imiq = new MultiIndexQuantizer();
609
+ read_index_header(imiq, f);
610
+ read_ProductQuantizer(&imiq->pq, f);
553
611
  idx = imiq;
554
- } else if(h == fourcc ("IxRF")) {
555
- IndexRefine *idxrf = new IndexRefine ();
556
- read_index_header (idxrf, f);
612
+ } else if (h == fourcc("IxRF")) {
613
+ IndexRefine* idxrf = new IndexRefine();
614
+ read_index_header(idxrf, f);
557
615
  idxrf->base_index = read_index(f, io_flags);
558
616
  idxrf->refine_index = read_index(f, io_flags);
559
- READ1 (idxrf->k_factor);
617
+ READ1(idxrf->k_factor);
560
618
  if (dynamic_cast<IndexFlat*>(idxrf->refine_index)) {
561
619
  // then make a RefineFlat with it
562
- IndexRefine *idxrf_old = idxrf;
620
+ IndexRefine* idxrf_old = idxrf;
563
621
  idxrf = new IndexRefineFlat();
564
622
  *idxrf = *idxrf_old;
565
623
  delete idxrf_old;
@@ -567,248 +625,260 @@ Index *read_index (IOReader *f, int io_flags) {
567
625
  idxrf->own_fields = true;
568
626
  idxrf->own_refine_index = true;
569
627
  idx = idxrf;
570
- } else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) {
571
- bool is_map2 = h == fourcc ("IxM2");
572
- IndexIDMap * idxmap = is_map2 ? new IndexIDMap2 () : new IndexIDMap ();
573
- read_index_header (idxmap, f);
574
- idxmap->index = read_index (f, io_flags);
628
+ } else if (h == fourcc("IxMp") || h == fourcc("IxM2")) {
629
+ bool is_map2 = h == fourcc("IxM2");
630
+ IndexIDMap* idxmap = is_map2 ? new IndexIDMap2() : new IndexIDMap();
631
+ read_index_header(idxmap, f);
632
+ idxmap->index = read_index(f, io_flags);
575
633
  idxmap->own_fields = true;
576
- READVECTOR (idxmap->id_map);
634
+ READVECTOR(idxmap->id_map);
577
635
  if (is_map2) {
578
- static_cast<IndexIDMap2*>(idxmap)->construct_rev_map ();
636
+ static_cast<IndexIDMap2*>(idxmap)->construct_rev_map();
579
637
  }
580
638
  idx = idxmap;
581
- } else if (h == fourcc ("Ix2L")) {
582
- Index2Layer * idxp = new Index2Layer ();
583
- read_index_header (idxp, f);
584
- idxp->q1.quantizer = read_index (f, io_flags);
585
- READ1 (idxp->q1.nlist);
586
- READ1 (idxp->q1.quantizer_trains_alone);
587
- read_ProductQuantizer (&idxp->pq, f);
588
- READ1 (idxp->code_size_1);
589
- READ1 (idxp->code_size_2);
590
- READ1 (idxp->code_size);
591
- READVECTOR (idxp->codes);
639
+ } else if (h == fourcc("Ix2L")) {
640
+ Index2Layer* idxp = new Index2Layer();
641
+ read_index_header(idxp, f);
642
+ idxp->q1.quantizer = read_index(f, io_flags);
643
+ READ1(idxp->q1.nlist);
644
+ READ1(idxp->q1.quantizer_trains_alone);
645
+ read_ProductQuantizer(&idxp->pq, f);
646
+ READ1(idxp->code_size_1);
647
+ READ1(idxp->code_size_2);
648
+ READ1(idxp->code_size);
649
+ READVECTOR(idxp->codes);
592
650
  idx = idxp;
593
- } else if(h == fourcc("IHNf") || h == fourcc("IHNp") ||
594
- h == fourcc("IHNs") || h == fourcc("IHN2")) {
595
- IndexHNSW *idxhnsw = nullptr;
596
- if (h == fourcc("IHNf")) idxhnsw = new IndexHNSWFlat ();
597
- if (h == fourcc("IHNp")) idxhnsw = new IndexHNSWPQ ();
598
- if (h == fourcc("IHNs")) idxhnsw = new IndexHNSWSQ ();
599
- if (h == fourcc("IHN2")) idxhnsw = new IndexHNSW2Level ();
600
- read_index_header (idxhnsw, f);
601
- read_HNSW (&idxhnsw->hnsw, f);
602
- idxhnsw->storage = read_index (f, io_flags);
651
+ } else if (
652
+ h == fourcc("IHNf") || h == fourcc("IHNp") || h == fourcc("IHNs") ||
653
+ h == fourcc("IHN2")) {
654
+ IndexHNSW* idxhnsw = nullptr;
655
+ if (h == fourcc("IHNf"))
656
+ idxhnsw = new IndexHNSWFlat();
657
+ if (h == fourcc("IHNp"))
658
+ idxhnsw = new IndexHNSWPQ();
659
+ if (h == fourcc("IHNs"))
660
+ idxhnsw = new IndexHNSWSQ();
661
+ if (h == fourcc("IHN2"))
662
+ idxhnsw = new IndexHNSW2Level();
663
+ read_index_header(idxhnsw, f);
664
+ read_HNSW(&idxhnsw->hnsw, f);
665
+ idxhnsw->storage = read_index(f, io_flags);
603
666
  idxhnsw->own_fields = true;
604
667
  if (h == fourcc("IHNp")) {
605
- dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table ();
668
+ dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table();
606
669
  }
607
670
  idx = idxhnsw;
608
- } else if(h == fourcc("IPfs")) {
609
- IndexPQFastScan *idxpqfs = new IndexPQFastScan();
610
- read_index_header (idxpqfs, f);
611
- read_ProductQuantizer (&idxpqfs->pq, f);
612
- READ1 (idxpqfs->implem);
613
- READ1 (idxpqfs->bbs);
614
- READ1 (idxpqfs->qbs);
615
- READ1 (idxpqfs->ntotal2);
616
- READ1 (idxpqfs->M2);
617
- READVECTOR (idxpqfs->codes);
671
+ } else if (h == fourcc("INSf")) {
672
+ IndexNSG* idxnsg = new IndexNSGFlat();
673
+ read_index_header(idxnsg, f);
674
+ READ1(idxnsg->GK);
675
+ READ1(idxnsg->build_type);
676
+ READ1(idxnsg->nndescent_S);
677
+ READ1(idxnsg->nndescent_R);
678
+ READ1(idxnsg->nndescent_L);
679
+ READ1(idxnsg->nndescent_iter);
680
+ read_NSG(&idxnsg->nsg, f);
681
+ idxnsg->storage = read_index(f, io_flags);
682
+ idxnsg->own_fields = true;
683
+ idx = idxnsg;
684
+ } else if (h == fourcc("IPfs")) {
685
+ IndexPQFastScan* idxpqfs = new IndexPQFastScan();
686
+ read_index_header(idxpqfs, f);
687
+ read_ProductQuantizer(&idxpqfs->pq, f);
688
+ READ1(idxpqfs->implem);
689
+ READ1(idxpqfs->bbs);
690
+ READ1(idxpqfs->qbs);
691
+ READ1(idxpqfs->ntotal2);
692
+ READ1(idxpqfs->M2);
693
+ READVECTOR(idxpqfs->codes);
618
694
  idx = idxpqfs;
619
695
 
620
696
  } else if (h == fourcc("IwPf")) {
621
- IndexIVFPQFastScan *ivpq = new IndexIVFPQFastScan();
622
- read_ivf_header (ivpq, f);
623
- READ1 (ivpq->by_residual);
624
- READ1 (ivpq->code_size);
625
- READ1 (ivpq->bbs);
626
- READ1 (ivpq->M2);
627
- READ1 (ivpq->implem);
628
- READ1 (ivpq->qbs2);
629
- read_ProductQuantizer (&ivpq->pq, f);
630
- read_InvertedLists (ivpq, f, io_flags);
697
+ IndexIVFPQFastScan* ivpq = new IndexIVFPQFastScan();
698
+ read_ivf_header(ivpq, f);
699
+ READ1(ivpq->by_residual);
700
+ READ1(ivpq->code_size);
701
+ READ1(ivpq->bbs);
702
+ READ1(ivpq->M2);
703
+ READ1(ivpq->implem);
704
+ READ1(ivpq->qbs2);
705
+ read_ProductQuantizer(&ivpq->pq, f);
706
+ read_InvertedLists(ivpq, f, io_flags);
631
707
  ivpq->precompute_table();
632
708
  idx = ivpq;
633
709
  } else {
634
710
  FAISS_THROW_FMT(
635
- "Index type 0x%08x (\"%s\") not recognized",
636
- h, fourcc_inv_printable(h).c_str()
637
- );
711
+ "Index type 0x%08x (\"%s\") not recognized",
712
+ h,
713
+ fourcc_inv_printable(h).c_str());
638
714
  idx = nullptr;
639
715
  }
640
716
  return idx;
641
717
  }
642
718
 
643
-
644
- Index *read_index (FILE * f, int io_flags) {
719
+ Index* read_index(FILE* f, int io_flags) {
645
720
  FileIOReader reader(f);
646
721
  return read_index(&reader, io_flags);
647
722
  }
648
723
 
649
- Index *read_index (const char *fname, int io_flags) {
724
+ Index* read_index(const char* fname, int io_flags) {
650
725
  FileIOReader reader(fname);
651
- Index *idx = read_index (&reader, io_flags);
726
+ Index* idx = read_index(&reader, io_flags);
652
727
  return idx;
653
728
  }
654
729
 
655
- VectorTransform *read_VectorTransform (const char *fname) {
730
+ VectorTransform* read_VectorTransform(const char* fname) {
656
731
  FileIOReader reader(fname);
657
- VectorTransform *vt = read_VectorTransform (&reader);
732
+ VectorTransform* vt = read_VectorTransform(&reader);
658
733
  return vt;
659
734
  }
660
735
 
661
-
662
-
663
736
  /*************************************************************
664
737
  * Read binary indexes
665
738
  **************************************************************/
666
739
 
667
- static void read_InvertedLists (
668
- IndexBinaryIVF *ivf, IOReader *f, int io_flags) {
669
- InvertedLists *ils = read_InvertedLists (f, io_flags);
670
- FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist &&
671
- ils->code_size == ivf->code_size));
740
+ static void read_InvertedLists(IndexBinaryIVF* ivf, IOReader* f, int io_flags) {
741
+ InvertedLists* ils = read_InvertedLists(f, io_flags);
742
+ FAISS_THROW_IF_NOT(
743
+ !ils ||
744
+ (ils->nlist == ivf->nlist && ils->code_size == ivf->code_size));
672
745
  ivf->invlists = ils;
673
746
  ivf->own_invlists = true;
674
747
  }
675
748
 
676
-
677
-
678
- static void read_index_binary_header (IndexBinary *idx, IOReader *f) {
679
- READ1 (idx->d);
680
- READ1 (idx->code_size);
681
- READ1 (idx->ntotal);
682
- READ1 (idx->is_trained);
683
- READ1 (idx->metric_type);
749
+ static void read_index_binary_header(IndexBinary* idx, IOReader* f) {
750
+ READ1(idx->d);
751
+ READ1(idx->code_size);
752
+ READ1(idx->ntotal);
753
+ READ1(idx->is_trained);
754
+ READ1(idx->metric_type);
684
755
  idx->verbose = false;
685
756
  }
686
757
 
687
- static void read_binary_ivf_header (
688
- IndexBinaryIVF *ivf, IOReader *f,
689
- std::vector<std::vector<Index::idx_t> > *ids = nullptr)
690
- {
691
- read_index_binary_header (ivf, f);
692
- READ1 (ivf->nlist);
693
- READ1 (ivf->nprobe);
694
- ivf->quantizer = read_index_binary (f);
758
+ static void read_binary_ivf_header(
759
+ IndexBinaryIVF* ivf,
760
+ IOReader* f,
761
+ std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
762
+ read_index_binary_header(ivf, f);
763
+ READ1(ivf->nlist);
764
+ READ1(ivf->nprobe);
765
+ ivf->quantizer = read_index_binary(f);
695
766
  ivf->own_fields = true;
696
767
  if (ids) { // used in legacy "Iv" formats
697
- ids->resize (ivf->nlist);
768
+ ids->resize(ivf->nlist);
698
769
  for (size_t i = 0; i < ivf->nlist; i++)
699
- READVECTOR ((*ids)[i]);
770
+ READVECTOR((*ids)[i]);
700
771
  }
701
- read_direct_map (&ivf->direct_map, f);
772
+ read_direct_map(&ivf->direct_map, f);
702
773
  }
703
774
 
704
- static void read_binary_hash_invlists (
705
- IndexBinaryHash::InvertedListMap &invlists,
706
- int b, IOReader *f)
707
- {
775
+ static void read_binary_hash_invlists(
776
+ IndexBinaryHash::InvertedListMap& invlists,
777
+ int b,
778
+ IOReader* f) {
708
779
  size_t sz;
709
- READ1 (sz);
780
+ READ1(sz);
710
781
  int il_nbit = 0;
711
- READ1 (il_nbit);
782
+ READ1(il_nbit);
712
783
  // buffer for bitstrings
713
784
  std::vector<uint8_t> buf((b + il_nbit) * sz);
714
- READVECTOR (buf);
715
- BitstringReader rd (buf.data(), buf.size());
716
- invlists.reserve (sz);
785
+ READVECTOR(buf);
786
+ BitstringReader rd(buf.data(), buf.size());
787
+ invlists.reserve(sz);
717
788
  for (size_t i = 0; i < sz; i++) {
718
789
  uint64_t hash = rd.read(b);
719
790
  uint64_t ilsz = rd.read(il_nbit);
720
- auto & il = invlists[hash];
721
- READVECTOR (il.ids);
722
- FAISS_THROW_IF_NOT (il.ids.size() == ilsz);
723
- READVECTOR (il.vecs);
791
+ auto& il = invlists[hash];
792
+ READVECTOR(il.ids);
793
+ FAISS_THROW_IF_NOT(il.ids.size() == ilsz);
794
+ READVECTOR(il.vecs);
724
795
  }
725
796
  }
726
797
 
727
798
  static void read_binary_multi_hash_map(
728
- IndexBinaryMultiHash::Map &map,
729
- int b, size_t ntotal,
730
- IOReader *f)
731
- {
799
+ IndexBinaryMultiHash::Map& map,
800
+ int b,
801
+ size_t ntotal,
802
+ IOReader* f) {
732
803
  int id_bits;
733
804
  size_t sz;
734
- READ1 (id_bits);
735
- READ1 (sz);
805
+ READ1(id_bits);
806
+ READ1(sz);
736
807
  std::vector<uint8_t> buf;
737
- READVECTOR (buf);
808
+ READVECTOR(buf);
738
809
  size_t nbit = (b + id_bits) * sz + ntotal * id_bits;
739
- FAISS_THROW_IF_NOT (buf.size() == (nbit + 7) / 8);
740
- BitstringReader rd (buf.data(), buf.size());
741
- map.reserve (sz);
810
+ FAISS_THROW_IF_NOT(buf.size() == (nbit + 7) / 8);
811
+ BitstringReader rd(buf.data(), buf.size());
812
+ map.reserve(sz);
742
813
  for (size_t i = 0; i < sz; i++) {
743
814
  uint64_t hash = rd.read(b);
744
815
  uint64_t ilsz = rd.read(id_bits);
745
- auto & il = map[hash];
816
+ auto& il = map[hash];
746
817
  for (size_t j = 0; j < ilsz; j++) {
747
- il.push_back (rd.read (id_bits));
818
+ il.push_back(rd.read(id_bits));
748
819
  }
749
820
  }
750
821
  }
751
822
 
752
-
753
-
754
- IndexBinary *read_index_binary (IOReader *f, int io_flags) {
755
- IndexBinary * idx = nullptr;
823
+ IndexBinary* read_index_binary(IOReader* f, int io_flags) {
824
+ IndexBinary* idx = nullptr;
756
825
  uint32_t h;
757
- READ1 (h);
758
- if (h == fourcc ("IBxF")) {
759
- IndexBinaryFlat *idxf = new IndexBinaryFlat ();
760
- read_index_binary_header (idxf, f);
761
- READVECTOR (idxf->xb);
762
- FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->code_size);
826
+ READ1(h);
827
+ if (h == fourcc("IBxF")) {
828
+ IndexBinaryFlat* idxf = new IndexBinaryFlat();
829
+ read_index_binary_header(idxf, f);
830
+ READVECTOR(idxf->xb);
831
+ FAISS_THROW_IF_NOT(idxf->xb.size() == idxf->ntotal * idxf->code_size);
763
832
  // leak!
764
833
  idx = idxf;
765
- } else if (h == fourcc ("IBwF")) {
766
- IndexBinaryIVF *ivf = new IndexBinaryIVF ();
767
- read_binary_ivf_header (ivf, f);
768
- read_InvertedLists (ivf, f, io_flags);
834
+ } else if (h == fourcc("IBwF")) {
835
+ IndexBinaryIVF* ivf = new IndexBinaryIVF();
836
+ read_binary_ivf_header(ivf, f);
837
+ read_InvertedLists(ivf, f, io_flags);
769
838
  idx = ivf;
770
- } else if (h == fourcc ("IBFf")) {
771
- IndexBinaryFromFloat *idxff = new IndexBinaryFromFloat ();
772
- read_index_binary_header (idxff, f);
839
+ } else if (h == fourcc("IBFf")) {
840
+ IndexBinaryFromFloat* idxff = new IndexBinaryFromFloat();
841
+ read_index_binary_header(idxff, f);
773
842
  idxff->own_fields = true;
774
- idxff->index = read_index (f, io_flags);
843
+ idxff->index = read_index(f, io_flags);
775
844
  idx = idxff;
776
- } else if (h == fourcc ("IBHf")) {
777
- IndexBinaryHNSW *idxhnsw = new IndexBinaryHNSW ();
778
- read_index_binary_header (idxhnsw, f);
779
- read_HNSW (&idxhnsw->hnsw, f);
780
- idxhnsw->storage = read_index_binary (f, io_flags);
845
+ } else if (h == fourcc("IBHf")) {
846
+ IndexBinaryHNSW* idxhnsw = new IndexBinaryHNSW();
847
+ read_index_binary_header(idxhnsw, f);
848
+ read_HNSW(&idxhnsw->hnsw, f);
849
+ idxhnsw->storage = read_index_binary(f, io_flags);
781
850
  idxhnsw->own_fields = true;
782
851
  idx = idxhnsw;
783
- } else if(h == fourcc ("IBMp") || h == fourcc ("IBM2")) {
784
- bool is_map2 = h == fourcc ("IBM2");
785
- IndexBinaryIDMap * idxmap = is_map2 ?
786
- new IndexBinaryIDMap2 () : new IndexBinaryIDMap ();
787
- read_index_binary_header (idxmap, f);
788
- idxmap->index = read_index_binary (f, io_flags);
852
+ } else if (h == fourcc("IBMp") || h == fourcc("IBM2")) {
853
+ bool is_map2 = h == fourcc("IBM2");
854
+ IndexBinaryIDMap* idxmap =
855
+ is_map2 ? new IndexBinaryIDMap2() : new IndexBinaryIDMap();
856
+ read_index_binary_header(idxmap, f);
857
+ idxmap->index = read_index_binary(f, io_flags);
789
858
  idxmap->own_fields = true;
790
- READVECTOR (idxmap->id_map);
859
+ READVECTOR(idxmap->id_map);
791
860
  if (is_map2) {
792
- static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map ();
861
+ static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map();
793
862
  }
794
863
  idx = idxmap;
795
- } else if(h == fourcc("IBHh")) {
796
- IndexBinaryHash *idxh = new IndexBinaryHash ();
797
- read_index_binary_header (idxh, f);
798
- READ1 (idxh->b);
799
- READ1 (idxh->nflip);
864
+ } else if (h == fourcc("IBHh")) {
865
+ IndexBinaryHash* idxh = new IndexBinaryHash();
866
+ read_index_binary_header(idxh, f);
867
+ READ1(idxh->b);
868
+ READ1(idxh->nflip);
800
869
  read_binary_hash_invlists(idxh->invlists, idxh->b, f);
801
870
  idx = idxh;
802
- } else if(h == fourcc("IBHm")) {
803
- IndexBinaryMultiHash* idxmh = new IndexBinaryMultiHash ();
804
- read_index_binary_header (idxmh, f);
805
- idxmh->storage = dynamic_cast<IndexBinaryFlat*> (read_index_binary (f));
806
- FAISS_THROW_IF_NOT(idxmh->storage && idxmh->storage->ntotal == idxmh->ntotal);
871
+ } else if (h == fourcc("IBHm")) {
872
+ IndexBinaryMultiHash* idxmh = new IndexBinaryMultiHash();
873
+ read_index_binary_header(idxmh, f);
874
+ idxmh->storage = dynamic_cast<IndexBinaryFlat*>(read_index_binary(f));
875
+ FAISS_THROW_IF_NOT(
876
+ idxmh->storage && idxmh->storage->ntotal == idxmh->ntotal);
807
877
  idxmh->own_fields = true;
808
- READ1 (idxmh->b);
809
- READ1 (idxmh->nhash);
810
- READ1 (idxmh->nflip);
811
- idxmh->maps.resize (idxmh->nhash);
878
+ READ1(idxmh->b);
879
+ READ1(idxmh->nhash);
880
+ READ1(idxmh->nflip);
881
+ idxmh->maps.resize(idxmh->nhash);
812
882
  for (int i = 0; i < idxmh->nhash; i++) {
813
883
  read_binary_multi_hash_map(
814
884
  idxmh->maps[i], idxmh->b, idxmh->ntotal, f);
@@ -816,25 +886,23 @@ IndexBinary *read_index_binary (IOReader *f, int io_flags) {
816
886
  idx = idxmh;
817
887
  } else {
818
888
  FAISS_THROW_FMT(
819
- "Index type %08x (\"%s\") not recognized",
820
- h, fourcc_inv_printable(h).c_str()
821
- );
889
+ "Index type %08x (\"%s\") not recognized",
890
+ h,
891
+ fourcc_inv_printable(h).c_str());
822
892
  idx = nullptr;
823
893
  }
824
894
  return idx;
825
895
  }
826
896
 
827
- IndexBinary *read_index_binary (FILE * f, int io_flags) {
897
+ IndexBinary* read_index_binary(FILE* f, int io_flags) {
828
898
  FileIOReader reader(f);
829
899
  return read_index_binary(&reader, io_flags);
830
900
  }
831
901
 
832
- IndexBinary *read_index_binary (const char *fname, int io_flags) {
902
+ IndexBinary* read_index_binary(const char* fname, int io_flags) {
833
903
  FileIOReader reader(fname);
834
- IndexBinary *idx = read_index_binary (&reader, io_flags);
904
+ IndexBinary* idx = read_index_binary(&reader, io_flags);
835
905
  return idx;
836
906
  }
837
907
 
838
-
839
-
840
908
  } // namespace faiss