faiss 0.2.0 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +7 -7
  5. data/ext/faiss/extconf.rb +6 -3
  6. data/ext/faiss/numo.hpp +4 -4
  7. data/ext/faiss/utils.cpp +1 -1
  8. data/ext/faiss/utils.h +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  11. data/vendor/faiss/faiss/AutoTune.h +55 -56
  12. data/vendor/faiss/faiss/Clustering.cpp +365 -194
  13. data/vendor/faiss/faiss/Clustering.h +102 -35
  14. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  15. data/vendor/faiss/faiss/IVFlib.h +48 -51
  16. data/vendor/faiss/faiss/Index.cpp +85 -103
  17. data/vendor/faiss/faiss/Index.h +54 -48
  18. data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
  19. data/vendor/faiss/faiss/Index2Layer.h +22 -36
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  21. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
  22. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  23. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  24. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  25. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  26. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  27. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  28. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  29. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  30. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  31. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  32. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  33. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  34. data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
  35. data/vendor/faiss/faiss/IndexFlat.h +42 -59
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  39. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  40. data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
  41. data/vendor/faiss/faiss/IndexIVF.h +169 -118
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
  54. data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
  55. data/vendor/faiss/faiss/IndexLSH.h +20 -38
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -82
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
  69. data/vendor/faiss/faiss/IndexRefine.h +32 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
  73. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
  74. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  75. data/vendor/faiss/faiss/IndexShards.h +85 -73
  76. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  77. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  78. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  79. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  80. data/vendor/faiss/faiss/MetricType.h +7 -7
  81. data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
  82. data/vendor/faiss/faiss/VectorTransform.h +64 -89
  83. data/vendor/faiss/faiss/clone_index.cpp +78 -73
  84. data/vendor/faiss/faiss/clone_index.h +4 -9
  85. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  86. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  87. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
  88. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  89. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  90. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  91. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  92. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  93. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  94. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  95. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  96. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  97. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  101. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  108. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  110. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  112. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  113. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  114. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  115. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  116. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  121. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  122. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  124. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  125. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  126. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  128. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  129. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  130. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  135. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  136. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  137. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  138. data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
  139. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
  142. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  144. data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
  145. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  146. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  148. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  149. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  151. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
  153. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  154. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  156. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  157. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  158. data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
  159. data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
  160. data/vendor/faiss/faiss/impl/io.cpp +76 -95
  161. data/vendor/faiss/faiss/impl/io.h +31 -41
  162. data/vendor/faiss/faiss/impl/io_macros.h +60 -29
  163. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  164. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  165. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  166. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  167. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  171. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  172. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  173. data/vendor/faiss/faiss/index_factory.cpp +619 -397
  174. data/vendor/faiss/faiss/index_factory.h +8 -6
  175. data/vendor/faiss/faiss/index_io.h +23 -26
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  177. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  178. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  179. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  180. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  181. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  183. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  185. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  186. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  187. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  188. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  189. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  190. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  191. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  192. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  193. data/vendor/faiss/faiss/utils/distances.cpp +305 -312
  194. data/vendor/faiss/faiss/utils/distances.h +170 -122
  195. data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
  196. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  197. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  198. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  199. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  200. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  201. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  202. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  203. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  204. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  205. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  206. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  207. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  208. data/vendor/faiss/faiss/utils/random.h +13 -16
  209. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  210. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  211. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  212. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  214. data/vendor/faiss/faiss/utils/utils.h +54 -49
  215. metadata +29 -4
@@ -9,11 +9,13 @@
9
9
 
10
10
  #include <faiss/index_io.h>
11
11
 
12
+ #include <faiss/impl/io_macros.h>
13
+
12
14
  #include <cstdio>
13
15
  #include <cstdlib>
14
16
 
15
- #include <sys/types.h>
16
17
  #include <sys/stat.h>
18
+ #include <sys/types.h>
17
19
 
18
20
  #include <faiss/impl/FaissAssert.h>
19
21
  #include <faiss/impl/io.h>
@@ -22,343 +24,434 @@
22
24
 
23
25
  #include <faiss/invlists/InvertedListsIOHook.h>
24
26
 
27
+ #include <faiss/Index2Layer.h>
28
+ #include <faiss/IndexAdditiveQuantizer.h>
25
29
  #include <faiss/IndexFlat.h>
26
- #include <faiss/VectorTransform.h>
27
- #include <faiss/IndexPreTransform.h>
28
- #include <faiss/IndexLSH.h>
29
- #include <faiss/IndexPQ.h>
30
+ #include <faiss/IndexHNSW.h>
30
31
  #include <faiss/IndexIVF.h>
32
+ #include <faiss/IndexIVFAdditiveQuantizer.h>
33
+ #include <faiss/IndexIVFFlat.h>
31
34
  #include <faiss/IndexIVFPQ.h>
35
+ #include <faiss/IndexIVFPQFastScan.h>
32
36
  #include <faiss/IndexIVFPQR.h>
33
- #include <faiss/Index2Layer.h>
34
- #include <faiss/IndexIVFFlat.h>
35
37
  #include <faiss/IndexIVFSpectralHash.h>
36
- #include <faiss/MetaIndexes.h>
37
- #include <faiss/IndexScalarQuantizer.h>
38
- #include <faiss/IndexHNSW.h>
38
+ #include <faiss/IndexLSH.h>
39
39
  #include <faiss/IndexLattice.h>
40
+ #include <faiss/IndexNSG.h>
41
+ #include <faiss/IndexPQ.h>
40
42
  #include <faiss/IndexPQFastScan.h>
41
- #include <faiss/IndexIVFPQFastScan.h>
43
+ #include <faiss/IndexPreTransform.h>
42
44
  #include <faiss/IndexRefine.h>
45
+ #include <faiss/IndexScalarQuantizer.h>
46
+ #include <faiss/MetaIndexes.h>
47
+ #include <faiss/VectorTransform.h>
43
48
 
44
49
  #include <faiss/IndexBinaryFlat.h>
45
50
  #include <faiss/IndexBinaryFromFloat.h>
46
51
  #include <faiss/IndexBinaryHNSW.h>
47
- #include <faiss/IndexBinaryIVF.h>
48
52
  #include <faiss/IndexBinaryHash.h>
53
+ #include <faiss/IndexBinaryIVF.h>
49
54
 
50
55
  namespace faiss {
51
56
 
52
-
53
57
  /*************************************************************
54
58
  * Read
55
59
  **************************************************************/
56
60
 
57
- static void read_index_header (Index *idx, IOReader *f) {
58
- READ1 (idx->d);
59
- READ1 (idx->ntotal);
61
+ static void read_index_header(Index* idx, IOReader* f) {
62
+ READ1(idx->d);
63
+ READ1(idx->ntotal);
60
64
  Index::idx_t dummy;
61
- READ1 (dummy);
62
- READ1 (dummy);
63
- READ1 (idx->is_trained);
64
- READ1 (idx->metric_type);
65
+ READ1(dummy);
66
+ READ1(dummy);
67
+ READ1(idx->is_trained);
68
+ READ1(idx->metric_type);
65
69
  if (idx->metric_type > 1) {
66
- READ1 (idx->metric_arg);
70
+ READ1(idx->metric_arg);
67
71
  }
68
72
  idx->verbose = false;
69
73
  }
70
74
 
71
- VectorTransform* read_VectorTransform (IOReader *f) {
75
+ VectorTransform* read_VectorTransform(IOReader* f) {
72
76
  uint32_t h;
73
- READ1 (h);
74
- VectorTransform *vt = nullptr;
75
-
76
- if (h == fourcc ("rrot") || h == fourcc ("PCAm") ||
77
- h == fourcc ("LTra") || h == fourcc ("PcAm") ||
78
- h == fourcc ("Viqm")) {
79
- LinearTransform *lt = nullptr;
80
- if (h == fourcc ("rrot")) {
81
- lt = new RandomRotationMatrix ();
82
- } else if (h == fourcc ("PCAm") ||
83
- h == fourcc ("PcAm")) {
84
- PCAMatrix * pca = new PCAMatrix ();
85
- READ1 (pca->eigen_power);
86
- READ1 (pca->random_rotation);
87
- if (h == fourcc ("PcAm"))
88
- READ1 (pca->balanced_bins);
89
- READVECTOR (pca->mean);
90
- READVECTOR (pca->eigenvalues);
91
- READVECTOR (pca->PCAMat);
77
+ READ1(h);
78
+ VectorTransform* vt = nullptr;
79
+
80
+ if (h == fourcc("rrot") || h == fourcc("PCAm") || h == fourcc("LTra") ||
81
+ h == fourcc("PcAm") || h == fourcc("Viqm") || h == fourcc("Pcam")) {
82
+ LinearTransform* lt = nullptr;
83
+ if (h == fourcc("rrot")) {
84
+ lt = new RandomRotationMatrix();
85
+ } else if (
86
+ h == fourcc("PCAm") || h == fourcc("PcAm") ||
87
+ h == fourcc("Pcam")) {
88
+ PCAMatrix* pca = new PCAMatrix();
89
+ READ1(pca->eigen_power);
90
+ if (h == fourcc("Pcam")) {
91
+ READ1(pca->epsilon);
92
+ }
93
+ READ1(pca->random_rotation);
94
+ if (h != fourcc("PCAm")) {
95
+ READ1(pca->balanced_bins);
96
+ }
97
+ READVECTOR(pca->mean);
98
+ READVECTOR(pca->eigenvalues);
99
+ READVECTOR(pca->PCAMat);
92
100
  lt = pca;
93
- } else if (h == fourcc ("Viqm")) {
94
- ITQMatrix *itqm = new ITQMatrix ();
95
- READ1 (itqm->max_iter);
96
- READ1 (itqm->seed);
101
+ } else if (h == fourcc("Viqm")) {
102
+ ITQMatrix* itqm = new ITQMatrix();
103
+ READ1(itqm->max_iter);
104
+ READ1(itqm->seed);
97
105
  lt = itqm;
98
- } else if (h == fourcc ("LTra")) {
99
- lt = new LinearTransform ();
106
+ } else if (h == fourcc("LTra")) {
107
+ lt = new LinearTransform();
100
108
  }
101
- READ1 (lt->have_bias);
102
- READVECTOR (lt->A);
103
- READVECTOR (lt->b);
104
- FAISS_THROW_IF_NOT (lt->A.size() >= lt->d_in * lt->d_out);
105
- FAISS_THROW_IF_NOT (!lt->have_bias || lt->b.size() >= lt->d_out);
109
+ READ1(lt->have_bias);
110
+ READVECTOR(lt->A);
111
+ READVECTOR(lt->b);
112
+ FAISS_THROW_IF_NOT(lt->A.size() >= lt->d_in * lt->d_out);
113
+ FAISS_THROW_IF_NOT(!lt->have_bias || lt->b.size() >= lt->d_out);
106
114
  lt->set_is_orthonormal();
107
115
  vt = lt;
108
- } else if (h == fourcc ("RmDT")) {
109
- RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
110
- READVECTOR (rdt->map);
116
+ } else if (h == fourcc("RmDT")) {
117
+ RemapDimensionsTransform* rdt = new RemapDimensionsTransform();
118
+ READVECTOR(rdt->map);
111
119
  vt = rdt;
112
- } else if (h == fourcc ("VNrm")) {
113
- NormalizationTransform *nt = new NormalizationTransform ();
114
- READ1 (nt->norm);
120
+ } else if (h == fourcc("VNrm")) {
121
+ NormalizationTransform* nt = new NormalizationTransform();
122
+ READ1(nt->norm);
115
123
  vt = nt;
116
- } else if (h == fourcc ("VCnt")) {
117
- CenteringTransform *ct = new CenteringTransform ();
118
- READVECTOR (ct->mean);
124
+ } else if (h == fourcc("VCnt")) {
125
+ CenteringTransform* ct = new CenteringTransform();
126
+ READVECTOR(ct->mean);
119
127
  vt = ct;
120
- } else if (h == fourcc ("Viqt")) {
121
- ITQTransform *itqt = new ITQTransform ();
128
+ } else if (h == fourcc("Viqt")) {
129
+ ITQTransform* itqt = new ITQTransform();
122
130
 
123
- READVECTOR (itqt->mean);
124
- READ1 (itqt->do_pca);
131
+ READVECTOR(itqt->mean);
132
+ READ1(itqt->do_pca);
125
133
  {
126
- ITQMatrix *itqm = dynamic_cast<ITQMatrix*>
127
- (read_VectorTransform (f));
134
+ ITQMatrix* itqm = dynamic_cast<ITQMatrix*>(read_VectorTransform(f));
128
135
  FAISS_THROW_IF_NOT(itqm);
129
136
  itqt->itq = *itqm;
130
137
  delete itqm;
131
138
  }
132
139
  {
133
- LinearTransform *pi = dynamic_cast<LinearTransform*>
134
- (read_VectorTransform (f));
135
- FAISS_THROW_IF_NOT (pi);
140
+ LinearTransform* pi =
141
+ dynamic_cast<LinearTransform*>(read_VectorTransform(f));
142
+ FAISS_THROW_IF_NOT(pi);
136
143
  itqt->pca_then_itq = *pi;
137
144
  delete pi;
138
145
  }
139
146
  vt = itqt;
140
147
  } else {
141
148
  FAISS_THROW_FMT(
142
- "fourcc %ud (\"%s\") not recognized",
143
- h, fourcc_inv_printable(h).c_str()
144
- );
149
+ "fourcc %ud (\"%s\") not recognized in %s",
150
+ h,
151
+ fourcc_inv_printable(h).c_str(),
152
+ f->name.c_str());
145
153
  }
146
- READ1 (vt->d_in);
147
- READ1 (vt->d_out);
148
- READ1 (vt->is_trained);
154
+ READ1(vt->d_in);
155
+ READ1(vt->d_out);
156
+ READ1(vt->is_trained);
149
157
  return vt;
150
158
  }
151
159
 
152
-
153
- static void read_ArrayInvertedLists_sizes (
154
- IOReader *f, std::vector<size_t> & sizes)
155
- {
160
+ static void read_ArrayInvertedLists_sizes(
161
+ IOReader* f,
162
+ std::vector<size_t>& sizes) {
156
163
  uint32_t list_type;
157
164
  READ1(list_type);
158
165
  if (list_type == fourcc("full")) {
159
166
  size_t os = sizes.size();
160
- READVECTOR (sizes);
161
- FAISS_THROW_IF_NOT (os == sizes.size());
167
+ READVECTOR(sizes);
168
+ FAISS_THROW_IF_NOT(os == sizes.size());
162
169
  } else if (list_type == fourcc("sprs")) {
163
170
  std::vector<size_t> idsizes;
164
- READVECTOR (idsizes);
171
+ READVECTOR(idsizes);
165
172
  for (size_t j = 0; j < idsizes.size(); j += 2) {
166
- FAISS_THROW_IF_NOT (idsizes[j] < sizes.size());
173
+ FAISS_THROW_IF_NOT(idsizes[j] < sizes.size());
167
174
  sizes[idsizes[j]] = idsizes[j + 1];
168
175
  }
169
176
  } else {
170
177
  FAISS_THROW_FMT(
171
- "list_type %ud (\"%s\") not recognized",
172
- list_type, fourcc_inv_printable(list_type).c_str()
173
- );
178
+ "list_type %ud (\"%s\") not recognized",
179
+ list_type,
180
+ fourcc_inv_printable(list_type).c_str());
174
181
  }
175
182
  }
176
183
 
177
- InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
184
+ InvertedLists* read_InvertedLists(IOReader* f, int io_flags) {
178
185
  uint32_t h;
179
- READ1 (h);
180
- if (h == fourcc ("il00")) {
181
- fprintf(stderr, "read_InvertedLists:"
186
+ READ1(h);
187
+ if (h == fourcc("il00")) {
188
+ fprintf(stderr,
189
+ "read_InvertedLists:"
182
190
  " WARN! inverted lists not stored with IVF object\n");
183
191
  return nullptr;
184
- } else if (h == fourcc ("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
185
- auto ails = new ArrayInvertedLists (0, 0);
186
- READ1 (ails->nlist);
187
- READ1 (ails->code_size);
188
- ails->ids.resize (ails->nlist);
189
- ails->codes.resize (ails->nlist);
190
- std::vector<size_t> sizes (ails->nlist);
191
- read_ArrayInvertedLists_sizes (f, sizes);
192
+ } else if (h == fourcc("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
193
+ auto ails = new ArrayInvertedLists(0, 0);
194
+ READ1(ails->nlist);
195
+ READ1(ails->code_size);
196
+ ails->ids.resize(ails->nlist);
197
+ ails->codes.resize(ails->nlist);
198
+ std::vector<size_t> sizes(ails->nlist);
199
+ read_ArrayInvertedLists_sizes(f, sizes);
192
200
  for (size_t i = 0; i < ails->nlist; i++) {
193
- ails->ids[i].resize (sizes[i]);
194
- ails->codes[i].resize (sizes[i] * ails->code_size);
201
+ ails->ids[i].resize(sizes[i]);
202
+ ails->codes[i].resize(sizes[i] * ails->code_size);
195
203
  }
196
204
  for (size_t i = 0; i < ails->nlist; i++) {
197
205
  size_t n = ails->ids[i].size();
198
206
  if (n > 0) {
199
- READANDCHECK (ails->codes[i].data(), n * ails->code_size);
200
- READANDCHECK (ails->ids[i].data(), n);
207
+ READANDCHECK(ails->codes[i].data(), n * ails->code_size);
208
+ READANDCHECK(ails->ids[i].data(), n);
201
209
  }
202
210
  }
203
211
  return ails;
204
212
 
205
- } else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
206
- // code is always ilxx where xx is specific to the type of invlists we want
207
- // so we get the 16 high bits from the io_flag and the 16 low bits as "il"
213
+ } else if (h == fourcc("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
214
+ // code is always ilxx where xx is specific to the type of invlists we
215
+ // want so we get the 16 high bits from the io_flag and the 16 low bits
216
+ // as "il"
208
217
  int h2 = (io_flags & 0xffff0000) | (fourcc("il__") & 0x0000ffff);
209
218
  size_t nlist, code_size;
210
- READ1 (nlist);
211
- READ1 (code_size);
212
- std::vector<size_t> sizes (nlist);
213
- read_ArrayInvertedLists_sizes (f, sizes);
219
+ READ1(nlist);
220
+ READ1(code_size);
221
+ std::vector<size_t> sizes(nlist);
222
+ read_ArrayInvertedLists_sizes(f, sizes);
214
223
  return InvertedListsIOHook::lookup(h2)->read_ArrayInvertedLists(
215
224
  f, io_flags, nlist, code_size, sizes);
216
225
  } else {
217
226
  return InvertedListsIOHook::lookup(h)->read(f, io_flags);
218
227
  }
219
-
220
228
  }
221
229
 
222
-
223
- static void read_InvertedLists (
224
- IndexIVF *ivf, IOReader *f, int io_flags) {
225
- InvertedLists *ils = read_InvertedLists (f, io_flags);
230
+ static void read_InvertedLists(IndexIVF* ivf, IOReader* f, int io_flags) {
231
+ InvertedLists* ils = read_InvertedLists(f, io_flags);
226
232
  if (ils) {
227
- FAISS_THROW_IF_NOT (ils->nlist == ivf->nlist);
228
- FAISS_THROW_IF_NOT (ils->code_size == InvertedLists::INVALID_CODE_SIZE ||
229
- ils->code_size == ivf->code_size);
233
+ FAISS_THROW_IF_NOT(ils->nlist == ivf->nlist);
234
+ FAISS_THROW_IF_NOT(
235
+ ils->code_size == InvertedLists::INVALID_CODE_SIZE ||
236
+ ils->code_size == ivf->code_size);
230
237
  }
231
238
  ivf->invlists = ils;
232
239
  ivf->own_invlists = true;
233
240
  }
234
241
 
235
- static void read_ProductQuantizer (ProductQuantizer *pq, IOReader *f) {
236
- READ1 (pq->d);
237
- READ1 (pq->M);
238
- READ1 (pq->nbits);
239
- pq->set_derived_values ();
240
- READVECTOR (pq->centroids);
242
+ static void read_ProductQuantizer(ProductQuantizer* pq, IOReader* f) {
243
+ READ1(pq->d);
244
+ READ1(pq->M);
245
+ READ1(pq->nbits);
246
+ pq->set_derived_values();
247
+ READVECTOR(pq->centroids);
241
248
  }
242
249
 
243
- static void read_ScalarQuantizer (ScalarQuantizer *ivsc, IOReader *f) {
244
- READ1 (ivsc->qtype);
245
- READ1 (ivsc->rangestat);
246
- READ1 (ivsc->rangestat_arg);
247
- READ1 (ivsc->d);
248
- READ1 (ivsc->code_size);
249
- READVECTOR (ivsc->trained);
250
- ivsc->set_derived_sizes ();
250
+ static void read_ResidualQuantizer_old(ResidualQuantizer* rq, IOReader* f) {
251
+ READ1(rq->d);
252
+ READ1(rq->M);
253
+ READVECTOR(rq->nbits);
254
+ READ1(rq->is_trained);
255
+ READ1(rq->train_type);
256
+ READ1(rq->max_beam_size);
257
+ READVECTOR(rq->codebooks);
258
+ READ1(rq->search_type);
259
+ READ1(rq->norm_min);
260
+ READ1(rq->norm_max);
261
+ rq->set_derived_values();
251
262
  }
252
263
 
264
+ static void read_AdditiveQuantizer(AdditiveQuantizer* aq, IOReader* f) {
265
+ READ1(aq->d);
266
+ READ1(aq->M);
267
+ READVECTOR(aq->nbits);
268
+ READ1(aq->is_trained);
269
+ READVECTOR(aq->codebooks);
270
+ READ1(aq->search_type);
271
+ READ1(aq->norm_min);
272
+ READ1(aq->norm_max);
273
+ if (aq->search_type == AdditiveQuantizer::ST_norm_cqint8 ||
274
+ aq->search_type == AdditiveQuantizer::ST_norm_cqint4) {
275
+ READXBVECTOR(aq->qnorm.codes);
276
+ }
277
+ aq->set_derived_values();
278
+ }
279
+
280
+ static void read_ResidualQuantizer(ResidualQuantizer* rq, IOReader* f) {
281
+ read_AdditiveQuantizer(rq, f);
282
+ READ1(rq->train_type);
283
+ READ1(rq->max_beam_size);
284
+ if (!(rq->train_type & ResidualQuantizer::Skip_codebook_tables)) {
285
+ rq->compute_codebook_tables();
286
+ }
287
+ }
288
+
289
+ static void read_LocalSearchQuantizer(LocalSearchQuantizer* lsq, IOReader* f) {
290
+ read_AdditiveQuantizer(lsq, f);
291
+ READ1(lsq->K);
292
+ READ1(lsq->train_iters);
293
+ READ1(lsq->encode_ils_iters);
294
+ READ1(lsq->train_ils_iters);
295
+ READ1(lsq->icm_iters);
296
+ READ1(lsq->p);
297
+ READ1(lsq->lambd);
298
+ READ1(lsq->chunk_size);
299
+ READ1(lsq->random_seed);
300
+ READ1(lsq->nperts);
301
+ READ1(lsq->update_codebooks_with_double);
302
+ }
303
+
304
+ static void read_ScalarQuantizer(ScalarQuantizer* ivsc, IOReader* f) {
305
+ READ1(ivsc->qtype);
306
+ READ1(ivsc->rangestat);
307
+ READ1(ivsc->rangestat_arg);
308
+ READ1(ivsc->d);
309
+ READ1(ivsc->code_size);
310
+ READVECTOR(ivsc->trained);
311
+ ivsc->set_derived_sizes();
312
+ }
253
313
 
254
- static void read_HNSW (HNSW *hnsw, IOReader *f) {
255
- READVECTOR (hnsw->assign_probas);
256
- READVECTOR (hnsw->cum_nneighbor_per_level);
257
- READVECTOR (hnsw->levels);
258
- READVECTOR (hnsw->offsets);
259
- READVECTOR (hnsw->neighbors);
314
+ static void read_HNSW(HNSW* hnsw, IOReader* f) {
315
+ READVECTOR(hnsw->assign_probas);
316
+ READVECTOR(hnsw->cum_nneighbor_per_level);
317
+ READVECTOR(hnsw->levels);
318
+ READVECTOR(hnsw->offsets);
319
+ READVECTOR(hnsw->neighbors);
320
+
321
+ READ1(hnsw->entry_point);
322
+ READ1(hnsw->max_level);
323
+ READ1(hnsw->efConstruction);
324
+ READ1(hnsw->efSearch);
325
+ READ1(hnsw->upper_beam);
326
+ }
327
+
328
+ static void read_NSG(NSG* nsg, IOReader* f) {
329
+ READ1(nsg->ntotal);
330
+ READ1(nsg->R);
331
+ READ1(nsg->L);
332
+ READ1(nsg->C);
333
+ READ1(nsg->search_L);
334
+ READ1(nsg->enterpoint);
335
+ READ1(nsg->is_built);
336
+
337
+ if (!nsg->is_built) {
338
+ return;
339
+ }
260
340
 
261
- READ1 (hnsw->entry_point);
262
- READ1 (hnsw->max_level);
263
- READ1 (hnsw->efConstruction);
264
- READ1 (hnsw->efSearch);
265
- READ1 (hnsw->upper_beam);
341
+ constexpr int EMPTY_ID = -1;
342
+ int N = nsg->ntotal;
343
+ int R = nsg->R;
344
+ auto& graph = nsg->final_graph;
345
+ graph = std::make_shared<nsg::Graph<int>>(N, R);
346
+ std::fill_n(graph->data, N * R, EMPTY_ID);
347
+
348
+ int size = 0;
349
+
350
+ for (int i = 0; i < N; i++) {
351
+ for (int j = 0; j < R + 1; j++) {
352
+ int id;
353
+ READ1(id);
354
+ if (id != EMPTY_ID) {
355
+ graph->at(i, j) = id;
356
+ size += 1;
357
+ } else {
358
+ break;
359
+ }
360
+ }
361
+ }
266
362
  }
267
363
 
268
- ProductQuantizer * read_ProductQuantizer (const char*fname) {
364
+ ProductQuantizer* read_ProductQuantizer(const char* fname) {
269
365
  FileIOReader reader(fname);
270
366
  return read_ProductQuantizer(&reader);
271
367
  }
272
368
 
273
- ProductQuantizer * read_ProductQuantizer (IOReader *reader) {
274
- ProductQuantizer *pq = new ProductQuantizer();
275
- ScopeDeleter1<ProductQuantizer> del (pq);
369
+ ProductQuantizer* read_ProductQuantizer(IOReader* reader) {
370
+ ProductQuantizer* pq = new ProductQuantizer();
371
+ ScopeDeleter1<ProductQuantizer> del(pq);
276
372
 
277
- read_ProductQuantizer(pq, reader);
278
- del.release ();
279
- return pq;
373
+ read_ProductQuantizer(pq, reader);
374
+ del.release();
375
+ return pq;
280
376
  }
281
377
 
282
- static void read_direct_map (DirectMap *dm, IOReader *f) {
378
+ static void read_direct_map(DirectMap* dm, IOReader* f) {
283
379
  char maintain_direct_map;
284
- READ1 (maintain_direct_map);
380
+ READ1(maintain_direct_map);
285
381
  dm->type = (DirectMap::Type)maintain_direct_map;
286
- READVECTOR (dm->array);
382
+ READVECTOR(dm->array);
287
383
  if (dm->type == DirectMap::Hashtable) {
288
384
  using idx_t = Index::idx_t;
289
385
  std::vector<std::pair<idx_t, idx_t>> v;
290
- READVECTOR (v);
291
- std::unordered_map<idx_t, idx_t> & map = dm->hashtable;
292
- map.reserve (v.size());
293
- for (auto it: v) {
294
- map [it.first] = it.second;
386
+ READVECTOR(v);
387
+ std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
388
+ map.reserve(v.size());
389
+ for (auto it : v) {
390
+ map[it.first] = it.second;
295
391
  }
296
392
  }
297
-
298
393
  }
299
394
 
300
-
301
- static void read_ivf_header (
302
- IndexIVF *ivf, IOReader *f,
303
- std::vector<std::vector<Index::idx_t> > *ids = nullptr)
304
- {
305
- read_index_header (ivf, f);
306
- READ1 (ivf->nlist);
307
- READ1 (ivf->nprobe);
308
- ivf->quantizer = read_index (f);
395
+ static void read_ivf_header(
396
+ IndexIVF* ivf,
397
+ IOReader* f,
398
+ std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
399
+ read_index_header(ivf, f);
400
+ READ1(ivf->nlist);
401
+ READ1(ivf->nprobe);
402
+ ivf->quantizer = read_index(f);
309
403
  ivf->own_fields = true;
310
404
  if (ids) { // used in legacy "Iv" formats
311
- ids->resize (ivf->nlist);
405
+ ids->resize(ivf->nlist);
312
406
  for (size_t i = 0; i < ivf->nlist; i++)
313
- READVECTOR ((*ids)[i]);
407
+ READVECTOR((*ids)[i]);
314
408
  }
315
- read_direct_map (&ivf->direct_map, f);
409
+ read_direct_map(&ivf->direct_map, f);
316
410
  }
317
411
 
318
412
  // used for legacy formats
319
- static ArrayInvertedLists *set_array_invlist(
320
- IndexIVF *ivf, std::vector<std::vector<Index::idx_t> > &ids)
321
- {
322
- ArrayInvertedLists *ail = new ArrayInvertedLists (
323
- ivf->nlist, ivf->code_size);
324
- std::swap (ail->ids, ids);
413
+ static ArrayInvertedLists* set_array_invlist(
414
+ IndexIVF* ivf,
415
+ std::vector<std::vector<Index::idx_t>>& ids) {
416
+ ArrayInvertedLists* ail =
417
+ new ArrayInvertedLists(ivf->nlist, ivf->code_size);
418
+ std::swap(ail->ids, ids);
325
419
  ivf->invlists = ail;
326
420
  ivf->own_invlists = true;
327
421
  return ail;
328
422
  }
329
423
 
330
- static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags)
331
- {
332
- bool legacy = h == fourcc ("IvQR") || h == fourcc ("IvPQ");
424
+ static IndexIVFPQ* read_ivfpq(IOReader* f, uint32_t h, int io_flags) {
425
+ bool legacy = h == fourcc("IvQR") || h == fourcc("IvPQ");
333
426
 
334
- IndexIVFPQR *ivfpqr =
335
- h == fourcc ("IvQR") || h == fourcc ("IwQR") ?
336
- new IndexIVFPQR () : nullptr;
337
- IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ ();
427
+ IndexIVFPQR* ivfpqr = h == fourcc("IvQR") || h == fourcc("IwQR")
428
+ ? new IndexIVFPQR()
429
+ : nullptr;
430
+ IndexIVFPQ* ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ();
338
431
 
339
- std::vector<std::vector<Index::idx_t> > ids;
340
- read_ivf_header (ivpq, f, legacy ? &ids : nullptr);
341
- READ1 (ivpq->by_residual);
342
- READ1 (ivpq->code_size);
343
- read_ProductQuantizer (&ivpq->pq, f);
432
+ std::vector<std::vector<Index::idx_t>> ids;
433
+ read_ivf_header(ivpq, f, legacy ? &ids : nullptr);
434
+ READ1(ivpq->by_residual);
435
+ READ1(ivpq->code_size);
436
+ read_ProductQuantizer(&ivpq->pq, f);
344
437
 
345
438
  if (legacy) {
346
- ArrayInvertedLists *ail = set_array_invlist (ivpq, ids);
439
+ ArrayInvertedLists* ail = set_array_invlist(ivpq, ids);
347
440
  for (size_t i = 0; i < ail->nlist; i++)
348
- READVECTOR (ail->codes[i]);
441
+ READVECTOR(ail->codes[i]);
349
442
  } else {
350
- read_InvertedLists (ivpq, f, io_flags);
443
+ read_InvertedLists(ivpq, f, io_flags);
351
444
  }
352
445
 
353
446
  if (ivpq->is_trained) {
354
447
  // precomputed table not stored. It is cheaper to recompute it
355
448
  ivpq->use_precomputed_table = 0;
356
449
  if (ivpq->by_residual)
357
- ivpq->precompute_table ();
450
+ ivpq->precompute_table();
358
451
  if (ivfpqr) {
359
- read_ProductQuantizer (&ivfpqr->refine_pq, f);
360
- READVECTOR (ivfpqr->refine_codes);
361
- READ1 (ivfpqr->k_factor);
452
+ read_ProductQuantizer(&ivfpqr->refine_pq, f);
453
+ READVECTOR(ivfpqr->refine_codes);
454
+ READ1(ivfpqr->k_factor);
362
455
  }
363
456
  }
364
457
  return ivpq;
@@ -366,200 +459,248 @@ static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags)
366
459
 
367
460
  int read_old_fmt_hack = 0;
368
461
 
369
- Index *read_index (IOReader *f, int io_flags) {
370
- Index * idx = nullptr;
462
+ Index* read_index(IOReader* f, int io_flags) {
463
+ Index* idx = nullptr;
371
464
  uint32_t h;
372
- READ1 (h);
373
- if (h == fourcc ("IxFI") || h == fourcc ("IxF2") || h == fourcc("IxFl")) {
374
- IndexFlat *idxf;
375
- if (h == fourcc ("IxFI")) {
376
- idxf = new IndexFlatIP ();
465
+ READ1(h);
466
+ if (h == fourcc("IxFI") || h == fourcc("IxF2") || h == fourcc("IxFl")) {
467
+ IndexFlat* idxf;
468
+ if (h == fourcc("IxFI")) {
469
+ idxf = new IndexFlatIP();
377
470
  } else if (h == fourcc("IxF2")) {
378
- idxf = new IndexFlatL2 ();
471
+ idxf = new IndexFlatL2();
379
472
  } else {
380
- idxf = new IndexFlat ();
473
+ idxf = new IndexFlat();
381
474
  }
382
- read_index_header (idxf, f);
383
- READVECTOR (idxf->xb);
384
- FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->d);
475
+ read_index_header(idxf, f);
476
+ idxf->code_size = idxf->d * sizeof(float);
477
+ READXBVECTOR(idxf->codes);
478
+ FAISS_THROW_IF_NOT(
479
+ idxf->codes.size() == idxf->ntotal * idxf->code_size);
385
480
  // leak!
386
481
  idx = idxf;
387
482
  } else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
388
- IndexLSH * idxl = new IndexLSH ();
389
- read_index_header (idxl, f);
390
- READ1 (idxl->nbits);
391
- READ1 (idxl->rotate_data);
392
- READ1 (idxl->train_thresholds);
393
- READVECTOR (idxl->thresholds);
394
- READ1 (idxl->bytes_per_vec);
483
+ IndexLSH* idxl = new IndexLSH();
484
+ read_index_header(idxl, f);
485
+ READ1(idxl->nbits);
486
+ READ1(idxl->rotate_data);
487
+ READ1(idxl->train_thresholds);
488
+ READVECTOR(idxl->thresholds);
489
+ int code_size_i;
490
+ READ1(code_size_i);
491
+ idxl->code_size = code_size_i;
395
492
  if (h == fourcc("IxHE")) {
396
- FAISS_THROW_IF_NOT_FMT (idxl->nbits % 64 == 0,
397
- "can only read old format IndexLSH with "
398
- "nbits multiple of 64 (got %d)",
399
- (int) idxl->nbits);
493
+ FAISS_THROW_IF_NOT_FMT(
494
+ idxl->nbits % 64 == 0,
495
+ "can only read old format IndexLSH with "
496
+ "nbits multiple of 64 (got %d)",
497
+ (int)idxl->nbits);
400
498
  // leak
401
- idxl->bytes_per_vec *= 8;
499
+ idxl->code_size *= 8;
402
500
  }
403
501
  {
404
- RandomRotationMatrix *rrot = dynamic_cast<RandomRotationMatrix *>
405
- (read_VectorTransform (f));
502
+ RandomRotationMatrix* rrot = dynamic_cast<RandomRotationMatrix*>(
503
+ read_VectorTransform(f));
406
504
  FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation");
407
505
  idxl->rrot = *rrot;
408
506
  delete rrot;
409
507
  }
410
- READVECTOR (idxl->codes);
411
- FAISS_THROW_IF_NOT (idxl->rrot.d_in == idxl->d &&
412
- idxl->rrot.d_out == idxl->nbits);
413
- FAISS_THROW_IF_NOT (
414
- idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
508
+ READVECTOR(idxl->codes);
509
+ FAISS_THROW_IF_NOT(
510
+ idxl->rrot.d_in == idxl->d && idxl->rrot.d_out == idxl->nbits);
511
+ FAISS_THROW_IF_NOT(
512
+ idxl->codes.size() == idxl->ntotal * idxl->code_size);
415
513
  idx = idxl;
416
- } else if (h == fourcc ("IxPQ") || h == fourcc ("IxPo") ||
417
- h == fourcc ("IxPq")) {
514
+ } else if (
515
+ h == fourcc("IxPQ") || h == fourcc("IxPo") || h == fourcc("IxPq")) {
418
516
  // IxPQ and IxPo were merged into the same IndexPQ object
419
- IndexPQ * idxp =new IndexPQ ();
420
- read_index_header (idxp, f);
421
- read_ProductQuantizer (&idxp->pq, f);
422
- READVECTOR (idxp->codes);
423
- if (h == fourcc ("IxPo") || h == fourcc ("IxPq")) {
424
- READ1 (idxp->search_type);
425
- READ1 (idxp->encode_signs);
426
- READ1 (idxp->polysemous_ht);
517
+ IndexPQ* idxp = new IndexPQ();
518
+ read_index_header(idxp, f);
519
+ read_ProductQuantizer(&idxp->pq, f);
520
+ idxp->code_size = idxp->pq.code_size;
521
+ READVECTOR(idxp->codes);
522
+ if (h == fourcc("IxPo") || h == fourcc("IxPq")) {
523
+ READ1(idxp->search_type);
524
+ READ1(idxp->encode_signs);
525
+ READ1(idxp->polysemous_ht);
427
526
  }
428
527
  // Old versoins of PQ all had metric_type set to INNER_PRODUCT
429
528
  // when they were in fact using L2. Therefore, we force metric type
430
529
  // to L2 when the old format is detected
431
- if (h == fourcc ("IxPQ") || h == fourcc ("IxPo")) {
530
+ if (h == fourcc("IxPQ") || h == fourcc("IxPo")) {
432
531
  idxp->metric_type = METRIC_L2;
433
532
  }
434
533
  idx = idxp;
435
- } else if (h == fourcc ("IvFl") || h == fourcc("IvFL")) { // legacy
436
- IndexIVFFlat * ivfl = new IndexIVFFlat ();
437
- std::vector<std::vector<Index::idx_t> > ids;
438
- read_ivf_header (ivfl, f, &ids);
534
+ } else if (h == fourcc("IxRQ") || h == fourcc("IxRq")) {
535
+ IndexResidualQuantizer* idxr = new IndexResidualQuantizer();
536
+ read_index_header(idxr, f);
537
+ if (h == fourcc("IxRQ")) {
538
+ read_ResidualQuantizer_old(&idxr->rq, f);
539
+ } else {
540
+ read_ResidualQuantizer(&idxr->rq, f);
541
+ }
542
+ READ1(idxr->code_size);
543
+ READVECTOR(idxr->codes);
544
+ idx = idxr;
545
+ } else if (h == fourcc("IxLS")) {
546
+ auto idxr = new IndexLocalSearchQuantizer();
547
+ read_index_header(idxr, f);
548
+ read_LocalSearchQuantizer(&idxr->lsq, f);
549
+ READ1(idxr->code_size);
550
+ READVECTOR(idxr->codes);
551
+ idx = idxr;
552
+ } else if (h == fourcc("ImRQ")) {
553
+ ResidualCoarseQuantizer* idxr = new ResidualCoarseQuantizer();
554
+ read_index_header(idxr, f);
555
+ read_ResidualQuantizer(&idxr->rq, f);
556
+ READ1(idxr->beam_factor);
557
+ idxr->set_beam_factor(idxr->beam_factor);
558
+ idx = idxr;
559
+ } else if (h == fourcc("IvFl") || h == fourcc("IvFL")) { // legacy
560
+ IndexIVFFlat* ivfl = new IndexIVFFlat();
561
+ std::vector<std::vector<Index::idx_t>> ids;
562
+ read_ivf_header(ivfl, f, &ids);
439
563
  ivfl->code_size = ivfl->d * sizeof(float);
440
- ArrayInvertedLists *ail = set_array_invlist (ivfl, ids);
564
+ ArrayInvertedLists* ail = set_array_invlist(ivfl, ids);
441
565
 
442
- if (h == fourcc ("IvFL")) {
566
+ if (h == fourcc("IvFL")) {
443
567
  for (size_t i = 0; i < ivfl->nlist; i++) {
444
- READVECTOR (ail->codes[i]);
568
+ READVECTOR(ail->codes[i]);
445
569
  }
446
570
  } else { // old format
447
571
  for (size_t i = 0; i < ivfl->nlist; i++) {
448
572
  std::vector<float> vec;
449
- READVECTOR (vec);
573
+ READVECTOR(vec);
450
574
  ail->codes[i].resize(vec.size() * sizeof(float));
451
- memcpy(ail->codes[i].data(), vec.data(),
452
- ail->codes[i].size());
575
+ memcpy(ail->codes[i].data(), vec.data(), ail->codes[i].size());
453
576
  }
454
577
  }
455
578
  idx = ivfl;
456
- } else if (h == fourcc ("IwFd")) {
457
- IndexIVFFlatDedup * ivfl = new IndexIVFFlatDedup ();
458
- read_ivf_header (ivfl, f);
579
+ } else if (h == fourcc("IwFd")) {
580
+ IndexIVFFlatDedup* ivfl = new IndexIVFFlatDedup();
581
+ read_ivf_header(ivfl, f);
459
582
  ivfl->code_size = ivfl->d * sizeof(float);
460
583
  {
461
584
  std::vector<Index::idx_t> tab;
462
- READVECTOR (tab);
585
+ READVECTOR(tab);
463
586
  for (long i = 0; i < tab.size(); i += 2) {
464
- std::pair<Index::idx_t, Index::idx_t>
465
- pair (tab[i], tab[i + 1]);
466
- ivfl->instances.insert (pair);
587
+ std::pair<Index::idx_t, Index::idx_t> pair(tab[i], tab[i + 1]);
588
+ ivfl->instances.insert(pair);
467
589
  }
468
590
  }
469
- read_InvertedLists (ivfl, f, io_flags);
591
+ read_InvertedLists(ivfl, f, io_flags);
470
592
  idx = ivfl;
471
- } else if (h == fourcc ("IwFl")) {
472
- IndexIVFFlat * ivfl = new IndexIVFFlat ();
473
- read_ivf_header (ivfl, f);
593
+ } else if (h == fourcc("IwFl")) {
594
+ IndexIVFFlat* ivfl = new IndexIVFFlat();
595
+ read_ivf_header(ivfl, f);
474
596
  ivfl->code_size = ivfl->d * sizeof(float);
475
- read_InvertedLists (ivfl, f, io_flags);
597
+ read_InvertedLists(ivfl, f, io_flags);
476
598
  idx = ivfl;
477
- } else if (h == fourcc ("IxSQ")) {
478
- IndexScalarQuantizer * idxs = new IndexScalarQuantizer ();
479
- read_index_header (idxs, f);
480
- read_ScalarQuantizer (&idxs->sq, f);
481
- READVECTOR (idxs->codes);
599
+ } else if (h == fourcc("IxSQ")) {
600
+ IndexScalarQuantizer* idxs = new IndexScalarQuantizer();
601
+ read_index_header(idxs, f);
602
+ read_ScalarQuantizer(&idxs->sq, f);
603
+ READVECTOR(idxs->codes);
482
604
  idxs->code_size = idxs->sq.code_size;
483
605
  idx = idxs;
484
- } else if (h == fourcc ("IxLa")) {
606
+ } else if (h == fourcc("IxLa")) {
485
607
  int d, nsq, scale_nbit, r2;
486
- READ1 (d);
487
- READ1 (nsq);
488
- READ1 (scale_nbit);
489
- READ1 (r2);
490
- IndexLattice *idxl = new IndexLattice (d, nsq, scale_nbit, r2);
491
- read_index_header (idxl, f);
492
- READVECTOR (idxl->trained);
608
+ READ1(d);
609
+ READ1(nsq);
610
+ READ1(scale_nbit);
611
+ READ1(r2);
612
+ IndexLattice* idxl = new IndexLattice(d, nsq, scale_nbit, r2);
613
+ read_index_header(idxl, f);
614
+ READVECTOR(idxl->trained);
493
615
  idx = idxl;
494
- } else if(h == fourcc ("IvSQ")) { // legacy
495
- IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
496
- std::vector<std::vector<Index::idx_t> > ids;
497
- read_ivf_header (ivsc, f, &ids);
498
- read_ScalarQuantizer (&ivsc->sq, f);
499
- READ1 (ivsc->code_size);
500
- ArrayInvertedLists *ail = set_array_invlist (ivsc, ids);
501
- for(int i = 0; i < ivsc->nlist; i++)
502
- READVECTOR (ail->codes[i]);
616
+ } else if (h == fourcc("IvSQ")) { // legacy
617
+ IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
618
+ std::vector<std::vector<Index::idx_t>> ids;
619
+ read_ivf_header(ivsc, f, &ids);
620
+ read_ScalarQuantizer(&ivsc->sq, f);
621
+ READ1(ivsc->code_size);
622
+ ArrayInvertedLists* ail = set_array_invlist(ivsc, ids);
623
+ for (int i = 0; i < ivsc->nlist; i++)
624
+ READVECTOR(ail->codes[i]);
503
625
  idx = ivsc;
504
- } else if(h == fourcc ("IwSQ") || h == fourcc ("IwSq")) {
505
- IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
506
- read_ivf_header (ivsc, f);
507
- read_ScalarQuantizer (&ivsc->sq, f);
508
- READ1 (ivsc->code_size);
509
- if (h == fourcc ("IwSQ")) {
626
+ } else if (h == fourcc("IwSQ") || h == fourcc("IwSq")) {
627
+ IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
628
+ read_ivf_header(ivsc, f);
629
+ read_ScalarQuantizer(&ivsc->sq, f);
630
+ READ1(ivsc->code_size);
631
+ if (h == fourcc("IwSQ")) {
510
632
  ivsc->by_residual = true;
511
633
  } else {
512
- READ1 (ivsc->by_residual);
634
+ READ1(ivsc->by_residual);
513
635
  }
514
- read_InvertedLists (ivsc, f, io_flags);
636
+ read_InvertedLists(ivsc, f, io_flags);
515
637
  idx = ivsc;
516
- } else if(h == fourcc ("IwSh")) {
517
- IndexIVFSpectralHash *ivsp = new IndexIVFSpectralHash ();
518
- read_ivf_header (ivsp, f);
519
- ivsp->vt = read_VectorTransform (f);
638
+ } else if (h == fourcc("IwLS") || h == fourcc("IwRQ")) {
639
+ bool is_LSQ = h == fourcc("IwLS");
640
+ IndexIVFAdditiveQuantizer* iva;
641
+ if (is_LSQ) {
642
+ iva = new IndexIVFLocalSearchQuantizer();
643
+ } else {
644
+ iva = new IndexIVFResidualQuantizer();
645
+ }
646
+ read_ivf_header(iva, f);
647
+ READ1(iva->code_size);
648
+ if (is_LSQ) {
649
+ read_LocalSearchQuantizer((LocalSearchQuantizer*)iva->aq, f);
650
+ } else {
651
+ read_ResidualQuantizer((ResidualQuantizer*)iva->aq, f);
652
+ }
653
+ READ1(iva->by_residual);
654
+ READ1(iva->use_precomputed_table);
655
+ read_InvertedLists(iva, f, io_flags);
656
+ idx = iva;
657
+ } else if (h == fourcc("IwSh")) {
658
+ IndexIVFSpectralHash* ivsp = new IndexIVFSpectralHash();
659
+ read_ivf_header(ivsp, f);
660
+ ivsp->vt = read_VectorTransform(f);
520
661
  ivsp->own_fields = true;
521
- READ1 (ivsp->nbit);
662
+ READ1(ivsp->nbit);
522
663
  // not stored by write_ivf_header
523
664
  ivsp->code_size = (ivsp->nbit + 7) / 8;
524
- READ1 (ivsp->period);
525
- READ1 (ivsp->threshold_type);
526
- READVECTOR (ivsp->trained);
527
- read_InvertedLists (ivsp, f, io_flags);
665
+ READ1(ivsp->period);
666
+ READ1(ivsp->threshold_type);
667
+ READVECTOR(ivsp->trained);
668
+ read_InvertedLists(ivsp, f, io_flags);
528
669
  idx = ivsp;
529
- } else if(h == fourcc ("IvPQ") || h == fourcc ("IvQR") ||
530
- h == fourcc ("IwPQ") || h == fourcc ("IwQR")) {
670
+ } else if (
671
+ h == fourcc("IvPQ") || h == fourcc("IvQR") || h == fourcc("IwPQ") ||
672
+ h == fourcc("IwQR")) {
673
+ idx = read_ivfpq(f, h, io_flags);
531
674
 
532
- idx = read_ivfpq (f, h, io_flags);
533
-
534
- } else if(h == fourcc ("IxPT")) {
535
- IndexPreTransform * ixpt = new IndexPreTransform();
675
+ } else if (h == fourcc("IxPT")) {
676
+ IndexPreTransform* ixpt = new IndexPreTransform();
536
677
  ixpt->own_fields = true;
537
- read_index_header (ixpt, f);
678
+ read_index_header(ixpt, f);
538
679
  int nt;
539
680
  if (read_old_fmt_hack == 2) {
540
681
  nt = 1;
541
682
  } else {
542
- READ1 (nt);
683
+ READ1(nt);
543
684
  }
544
685
  for (int i = 0; i < nt; i++) {
545
- ixpt->chain.push_back (read_VectorTransform (f));
686
+ ixpt->chain.push_back(read_VectorTransform(f));
546
687
  }
547
- ixpt->index = read_index (f, io_flags);
688
+ ixpt->index = read_index(f, io_flags);
548
689
  idx = ixpt;
549
- } else if(h == fourcc ("Imiq")) {
550
- MultiIndexQuantizer * imiq = new MultiIndexQuantizer ();
551
- read_index_header (imiq, f);
552
- read_ProductQuantizer (&imiq->pq, f);
690
+ } else if (h == fourcc("Imiq")) {
691
+ MultiIndexQuantizer* imiq = new MultiIndexQuantizer();
692
+ read_index_header(imiq, f);
693
+ read_ProductQuantizer(&imiq->pq, f);
553
694
  idx = imiq;
554
- } else if(h == fourcc ("IxRF")) {
555
- IndexRefine *idxrf = new IndexRefine ();
556
- read_index_header (idxrf, f);
695
+ } else if (h == fourcc("IxRF")) {
696
+ IndexRefine* idxrf = new IndexRefine();
697
+ read_index_header(idxrf, f);
557
698
  idxrf->base_index = read_index(f, io_flags);
558
699
  idxrf->refine_index = read_index(f, io_flags);
559
- READ1 (idxrf->k_factor);
700
+ READ1(idxrf->k_factor);
560
701
  if (dynamic_cast<IndexFlat*>(idxrf->refine_index)) {
561
702
  // then make a RefineFlat with it
562
- IndexRefine *idxrf_old = idxrf;
703
+ IndexRefine* idxrf_old = idxrf;
563
704
  idxrf = new IndexRefineFlat();
564
705
  *idxrf = *idxrf_old;
565
706
  delete idxrf_old;
@@ -567,248 +708,260 @@ Index *read_index (IOReader *f, int io_flags) {
567
708
  idxrf->own_fields = true;
568
709
  idxrf->own_refine_index = true;
569
710
  idx = idxrf;
570
- } else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) {
571
- bool is_map2 = h == fourcc ("IxM2");
572
- IndexIDMap * idxmap = is_map2 ? new IndexIDMap2 () : new IndexIDMap ();
573
- read_index_header (idxmap, f);
574
- idxmap->index = read_index (f, io_flags);
711
+ } else if (h == fourcc("IxMp") || h == fourcc("IxM2")) {
712
+ bool is_map2 = h == fourcc("IxM2");
713
+ IndexIDMap* idxmap = is_map2 ? new IndexIDMap2() : new IndexIDMap();
714
+ read_index_header(idxmap, f);
715
+ idxmap->index = read_index(f, io_flags);
575
716
  idxmap->own_fields = true;
576
- READVECTOR (idxmap->id_map);
717
+ READVECTOR(idxmap->id_map);
577
718
  if (is_map2) {
578
- static_cast<IndexIDMap2*>(idxmap)->construct_rev_map ();
719
+ static_cast<IndexIDMap2*>(idxmap)->construct_rev_map();
579
720
  }
580
721
  idx = idxmap;
581
- } else if (h == fourcc ("Ix2L")) {
582
- Index2Layer * idxp = new Index2Layer ();
583
- read_index_header (idxp, f);
584
- idxp->q1.quantizer = read_index (f, io_flags);
585
- READ1 (idxp->q1.nlist);
586
- READ1 (idxp->q1.quantizer_trains_alone);
587
- read_ProductQuantizer (&idxp->pq, f);
588
- READ1 (idxp->code_size_1);
589
- READ1 (idxp->code_size_2);
590
- READ1 (idxp->code_size);
591
- READVECTOR (idxp->codes);
722
+ } else if (h == fourcc("Ix2L")) {
723
+ Index2Layer* idxp = new Index2Layer();
724
+ read_index_header(idxp, f);
725
+ idxp->q1.quantizer = read_index(f, io_flags);
726
+ READ1(idxp->q1.nlist);
727
+ READ1(idxp->q1.quantizer_trains_alone);
728
+ read_ProductQuantizer(&idxp->pq, f);
729
+ READ1(idxp->code_size_1);
730
+ READ1(idxp->code_size_2);
731
+ READ1(idxp->code_size);
732
+ READVECTOR(idxp->codes);
592
733
  idx = idxp;
593
- } else if(h == fourcc("IHNf") || h == fourcc("IHNp") ||
594
- h == fourcc("IHNs") || h == fourcc("IHN2")) {
595
- IndexHNSW *idxhnsw = nullptr;
596
- if (h == fourcc("IHNf")) idxhnsw = new IndexHNSWFlat ();
597
- if (h == fourcc("IHNp")) idxhnsw = new IndexHNSWPQ ();
598
- if (h == fourcc("IHNs")) idxhnsw = new IndexHNSWSQ ();
599
- if (h == fourcc("IHN2")) idxhnsw = new IndexHNSW2Level ();
600
- read_index_header (idxhnsw, f);
601
- read_HNSW (&idxhnsw->hnsw, f);
602
- idxhnsw->storage = read_index (f, io_flags);
734
+ } else if (
735
+ h == fourcc("IHNf") || h == fourcc("IHNp") || h == fourcc("IHNs") ||
736
+ h == fourcc("IHN2")) {
737
+ IndexHNSW* idxhnsw = nullptr;
738
+ if (h == fourcc("IHNf"))
739
+ idxhnsw = new IndexHNSWFlat();
740
+ if (h == fourcc("IHNp"))
741
+ idxhnsw = new IndexHNSWPQ();
742
+ if (h == fourcc("IHNs"))
743
+ idxhnsw = new IndexHNSWSQ();
744
+ if (h == fourcc("IHN2"))
745
+ idxhnsw = new IndexHNSW2Level();
746
+ read_index_header(idxhnsw, f);
747
+ read_HNSW(&idxhnsw->hnsw, f);
748
+ idxhnsw->storage = read_index(f, io_flags);
603
749
  idxhnsw->own_fields = true;
604
750
  if (h == fourcc("IHNp")) {
605
- dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table ();
751
+ dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table();
606
752
  }
607
753
  idx = idxhnsw;
608
- } else if(h == fourcc("IPfs")) {
609
- IndexPQFastScan *idxpqfs = new IndexPQFastScan();
610
- read_index_header (idxpqfs, f);
611
- read_ProductQuantizer (&idxpqfs->pq, f);
612
- READ1 (idxpqfs->implem);
613
- READ1 (idxpqfs->bbs);
614
- READ1 (idxpqfs->qbs);
615
- READ1 (idxpqfs->ntotal2);
616
- READ1 (idxpqfs->M2);
617
- READVECTOR (idxpqfs->codes);
754
+ } else if (h == fourcc("INSf")) {
755
+ IndexNSG* idxnsg = new IndexNSGFlat();
756
+ read_index_header(idxnsg, f);
757
+ READ1(idxnsg->GK);
758
+ READ1(idxnsg->build_type);
759
+ READ1(idxnsg->nndescent_S);
760
+ READ1(idxnsg->nndescent_R);
761
+ READ1(idxnsg->nndescent_L);
762
+ READ1(idxnsg->nndescent_iter);
763
+ read_NSG(&idxnsg->nsg, f);
764
+ idxnsg->storage = read_index(f, io_flags);
765
+ idxnsg->own_fields = true;
766
+ idx = idxnsg;
767
+ } else if (h == fourcc("IPfs")) {
768
+ IndexPQFastScan* idxpqfs = new IndexPQFastScan();
769
+ read_index_header(idxpqfs, f);
770
+ read_ProductQuantizer(&idxpqfs->pq, f);
771
+ READ1(idxpqfs->implem);
772
+ READ1(idxpqfs->bbs);
773
+ READ1(idxpqfs->qbs);
774
+ READ1(idxpqfs->ntotal2);
775
+ READ1(idxpqfs->M2);
776
+ READVECTOR(idxpqfs->codes);
618
777
  idx = idxpqfs;
619
778
 
620
779
  } else if (h == fourcc("IwPf")) {
621
- IndexIVFPQFastScan *ivpq = new IndexIVFPQFastScan();
622
- read_ivf_header (ivpq, f);
623
- READ1 (ivpq->by_residual);
624
- READ1 (ivpq->code_size);
625
- READ1 (ivpq->bbs);
626
- READ1 (ivpq->M2);
627
- READ1 (ivpq->implem);
628
- READ1 (ivpq->qbs2);
629
- read_ProductQuantizer (&ivpq->pq, f);
630
- read_InvertedLists (ivpq, f, io_flags);
780
+ IndexIVFPQFastScan* ivpq = new IndexIVFPQFastScan();
781
+ read_ivf_header(ivpq, f);
782
+ READ1(ivpq->by_residual);
783
+ READ1(ivpq->code_size);
784
+ READ1(ivpq->bbs);
785
+ READ1(ivpq->M2);
786
+ READ1(ivpq->implem);
787
+ READ1(ivpq->qbs2);
788
+ read_ProductQuantizer(&ivpq->pq, f);
789
+ read_InvertedLists(ivpq, f, io_flags);
631
790
  ivpq->precompute_table();
632
791
  idx = ivpq;
633
792
  } else {
634
793
  FAISS_THROW_FMT(
635
- "Index type 0x%08x (\"%s\") not recognized",
636
- h, fourcc_inv_printable(h).c_str()
637
- );
794
+ "Index type 0x%08x (\"%s\") not recognized",
795
+ h,
796
+ fourcc_inv_printable(h).c_str());
638
797
  idx = nullptr;
639
798
  }
640
799
  return idx;
641
800
  }
642
801
 
643
-
644
- Index *read_index (FILE * f, int io_flags) {
802
+ Index* read_index(FILE* f, int io_flags) {
645
803
  FileIOReader reader(f);
646
804
  return read_index(&reader, io_flags);
647
805
  }
648
806
 
649
- Index *read_index (const char *fname, int io_flags) {
807
+ Index* read_index(const char* fname, int io_flags) {
650
808
  FileIOReader reader(fname);
651
- Index *idx = read_index (&reader, io_flags);
809
+ Index* idx = read_index(&reader, io_flags);
652
810
  return idx;
653
811
  }
654
812
 
655
- VectorTransform *read_VectorTransform (const char *fname) {
813
+ VectorTransform* read_VectorTransform(const char* fname) {
656
814
  FileIOReader reader(fname);
657
- VectorTransform *vt = read_VectorTransform (&reader);
815
+ VectorTransform* vt = read_VectorTransform(&reader);
658
816
  return vt;
659
817
  }
660
818
 
661
-
662
-
663
819
  /*************************************************************
664
820
  * Read binary indexes
665
821
  **************************************************************/
666
822
 
667
- static void read_InvertedLists (
668
- IndexBinaryIVF *ivf, IOReader *f, int io_flags) {
669
- InvertedLists *ils = read_InvertedLists (f, io_flags);
670
- FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist &&
671
- ils->code_size == ivf->code_size));
823
+ static void read_InvertedLists(IndexBinaryIVF* ivf, IOReader* f, int io_flags) {
824
+ InvertedLists* ils = read_InvertedLists(f, io_flags);
825
+ FAISS_THROW_IF_NOT(
826
+ !ils ||
827
+ (ils->nlist == ivf->nlist && ils->code_size == ivf->code_size));
672
828
  ivf->invlists = ils;
673
829
  ivf->own_invlists = true;
674
830
  }
675
831
 
676
-
677
-
678
- static void read_index_binary_header (IndexBinary *idx, IOReader *f) {
679
- READ1 (idx->d);
680
- READ1 (idx->code_size);
681
- READ1 (idx->ntotal);
682
- READ1 (idx->is_trained);
683
- READ1 (idx->metric_type);
832
+ static void read_index_binary_header(IndexBinary* idx, IOReader* f) {
833
+ READ1(idx->d);
834
+ READ1(idx->code_size);
835
+ READ1(idx->ntotal);
836
+ READ1(idx->is_trained);
837
+ READ1(idx->metric_type);
684
838
  idx->verbose = false;
685
839
  }
686
840
 
687
- static void read_binary_ivf_header (
688
- IndexBinaryIVF *ivf, IOReader *f,
689
- std::vector<std::vector<Index::idx_t> > *ids = nullptr)
690
- {
691
- read_index_binary_header (ivf, f);
692
- READ1 (ivf->nlist);
693
- READ1 (ivf->nprobe);
694
- ivf->quantizer = read_index_binary (f);
841
+ static void read_binary_ivf_header(
842
+ IndexBinaryIVF* ivf,
843
+ IOReader* f,
844
+ std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
845
+ read_index_binary_header(ivf, f);
846
+ READ1(ivf->nlist);
847
+ READ1(ivf->nprobe);
848
+ ivf->quantizer = read_index_binary(f);
695
849
  ivf->own_fields = true;
696
850
  if (ids) { // used in legacy "Iv" formats
697
- ids->resize (ivf->nlist);
851
+ ids->resize(ivf->nlist);
698
852
  for (size_t i = 0; i < ivf->nlist; i++)
699
- READVECTOR ((*ids)[i]);
853
+ READVECTOR((*ids)[i]);
700
854
  }
701
- read_direct_map (&ivf->direct_map, f);
855
+ read_direct_map(&ivf->direct_map, f);
702
856
  }
703
857
 
704
- static void read_binary_hash_invlists (
705
- IndexBinaryHash::InvertedListMap &invlists,
706
- int b, IOReader *f)
707
- {
858
+ static void read_binary_hash_invlists(
859
+ IndexBinaryHash::InvertedListMap& invlists,
860
+ int b,
861
+ IOReader* f) {
708
862
  size_t sz;
709
- READ1 (sz);
863
+ READ1(sz);
710
864
  int il_nbit = 0;
711
- READ1 (il_nbit);
865
+ READ1(il_nbit);
712
866
  // buffer for bitstrings
713
867
  std::vector<uint8_t> buf((b + il_nbit) * sz);
714
- READVECTOR (buf);
715
- BitstringReader rd (buf.data(), buf.size());
716
- invlists.reserve (sz);
868
+ READVECTOR(buf);
869
+ BitstringReader rd(buf.data(), buf.size());
870
+ invlists.reserve(sz);
717
871
  for (size_t i = 0; i < sz; i++) {
718
872
  uint64_t hash = rd.read(b);
719
873
  uint64_t ilsz = rd.read(il_nbit);
720
- auto & il = invlists[hash];
721
- READVECTOR (il.ids);
722
- FAISS_THROW_IF_NOT (il.ids.size() == ilsz);
723
- READVECTOR (il.vecs);
874
+ auto& il = invlists[hash];
875
+ READVECTOR(il.ids);
876
+ FAISS_THROW_IF_NOT(il.ids.size() == ilsz);
877
+ READVECTOR(il.vecs);
724
878
  }
725
879
  }
726
880
 
727
881
  static void read_binary_multi_hash_map(
728
- IndexBinaryMultiHash::Map &map,
729
- int b, size_t ntotal,
730
- IOReader *f)
731
- {
882
+ IndexBinaryMultiHash::Map& map,
883
+ int b,
884
+ size_t ntotal,
885
+ IOReader* f) {
732
886
  int id_bits;
733
887
  size_t sz;
734
- READ1 (id_bits);
735
- READ1 (sz);
888
+ READ1(id_bits);
889
+ READ1(sz);
736
890
  std::vector<uint8_t> buf;
737
- READVECTOR (buf);
891
+ READVECTOR(buf);
738
892
  size_t nbit = (b + id_bits) * sz + ntotal * id_bits;
739
- FAISS_THROW_IF_NOT (buf.size() == (nbit + 7) / 8);
740
- BitstringReader rd (buf.data(), buf.size());
741
- map.reserve (sz);
893
+ FAISS_THROW_IF_NOT(buf.size() == (nbit + 7) / 8);
894
+ BitstringReader rd(buf.data(), buf.size());
895
+ map.reserve(sz);
742
896
  for (size_t i = 0; i < sz; i++) {
743
897
  uint64_t hash = rd.read(b);
744
898
  uint64_t ilsz = rd.read(id_bits);
745
- auto & il = map[hash];
899
+ auto& il = map[hash];
746
900
  for (size_t j = 0; j < ilsz; j++) {
747
- il.push_back (rd.read (id_bits));
901
+ il.push_back(rd.read(id_bits));
748
902
  }
749
903
  }
750
904
  }
751
905
 
752
-
753
-
754
- IndexBinary *read_index_binary (IOReader *f, int io_flags) {
755
- IndexBinary * idx = nullptr;
906
+ IndexBinary* read_index_binary(IOReader* f, int io_flags) {
907
+ IndexBinary* idx = nullptr;
756
908
  uint32_t h;
757
- READ1 (h);
758
- if (h == fourcc ("IBxF")) {
759
- IndexBinaryFlat *idxf = new IndexBinaryFlat ();
760
- read_index_binary_header (idxf, f);
761
- READVECTOR (idxf->xb);
762
- FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->code_size);
909
+ READ1(h);
910
+ if (h == fourcc("IBxF")) {
911
+ IndexBinaryFlat* idxf = new IndexBinaryFlat();
912
+ read_index_binary_header(idxf, f);
913
+ READVECTOR(idxf->xb);
914
+ FAISS_THROW_IF_NOT(idxf->xb.size() == idxf->ntotal * idxf->code_size);
763
915
  // leak!
764
916
  idx = idxf;
765
- } else if (h == fourcc ("IBwF")) {
766
- IndexBinaryIVF *ivf = new IndexBinaryIVF ();
767
- read_binary_ivf_header (ivf, f);
768
- read_InvertedLists (ivf, f, io_flags);
917
+ } else if (h == fourcc("IBwF")) {
918
+ IndexBinaryIVF* ivf = new IndexBinaryIVF();
919
+ read_binary_ivf_header(ivf, f);
920
+ read_InvertedLists(ivf, f, io_flags);
769
921
  idx = ivf;
770
- } else if (h == fourcc ("IBFf")) {
771
- IndexBinaryFromFloat *idxff = new IndexBinaryFromFloat ();
772
- read_index_binary_header (idxff, f);
922
+ } else if (h == fourcc("IBFf")) {
923
+ IndexBinaryFromFloat* idxff = new IndexBinaryFromFloat();
924
+ read_index_binary_header(idxff, f);
773
925
  idxff->own_fields = true;
774
- idxff->index = read_index (f, io_flags);
926
+ idxff->index = read_index(f, io_flags);
775
927
  idx = idxff;
776
- } else if (h == fourcc ("IBHf")) {
777
- IndexBinaryHNSW *idxhnsw = new IndexBinaryHNSW ();
778
- read_index_binary_header (idxhnsw, f);
779
- read_HNSW (&idxhnsw->hnsw, f);
780
- idxhnsw->storage = read_index_binary (f, io_flags);
928
+ } else if (h == fourcc("IBHf")) {
929
+ IndexBinaryHNSW* idxhnsw = new IndexBinaryHNSW();
930
+ read_index_binary_header(idxhnsw, f);
931
+ read_HNSW(&idxhnsw->hnsw, f);
932
+ idxhnsw->storage = read_index_binary(f, io_flags);
781
933
  idxhnsw->own_fields = true;
782
934
  idx = idxhnsw;
783
- } else if(h == fourcc ("IBMp") || h == fourcc ("IBM2")) {
784
- bool is_map2 = h == fourcc ("IBM2");
785
- IndexBinaryIDMap * idxmap = is_map2 ?
786
- new IndexBinaryIDMap2 () : new IndexBinaryIDMap ();
787
- read_index_binary_header (idxmap, f);
788
- idxmap->index = read_index_binary (f, io_flags);
935
+ } else if (h == fourcc("IBMp") || h == fourcc("IBM2")) {
936
+ bool is_map2 = h == fourcc("IBM2");
937
+ IndexBinaryIDMap* idxmap =
938
+ is_map2 ? new IndexBinaryIDMap2() : new IndexBinaryIDMap();
939
+ read_index_binary_header(idxmap, f);
940
+ idxmap->index = read_index_binary(f, io_flags);
789
941
  idxmap->own_fields = true;
790
- READVECTOR (idxmap->id_map);
942
+ READVECTOR(idxmap->id_map);
791
943
  if (is_map2) {
792
- static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map ();
944
+ static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map();
793
945
  }
794
946
  idx = idxmap;
795
- } else if(h == fourcc("IBHh")) {
796
- IndexBinaryHash *idxh = new IndexBinaryHash ();
797
- read_index_binary_header (idxh, f);
798
- READ1 (idxh->b);
799
- READ1 (idxh->nflip);
947
+ } else if (h == fourcc("IBHh")) {
948
+ IndexBinaryHash* idxh = new IndexBinaryHash();
949
+ read_index_binary_header(idxh, f);
950
+ READ1(idxh->b);
951
+ READ1(idxh->nflip);
800
952
  read_binary_hash_invlists(idxh->invlists, idxh->b, f);
801
953
  idx = idxh;
802
- } else if(h == fourcc("IBHm")) {
803
- IndexBinaryMultiHash* idxmh = new IndexBinaryMultiHash ();
804
- read_index_binary_header (idxmh, f);
805
- idxmh->storage = dynamic_cast<IndexBinaryFlat*> (read_index_binary (f));
806
- FAISS_THROW_IF_NOT(idxmh->storage && idxmh->storage->ntotal == idxmh->ntotal);
954
+ } else if (h == fourcc("IBHm")) {
955
+ IndexBinaryMultiHash* idxmh = new IndexBinaryMultiHash();
956
+ read_index_binary_header(idxmh, f);
957
+ idxmh->storage = dynamic_cast<IndexBinaryFlat*>(read_index_binary(f));
958
+ FAISS_THROW_IF_NOT(
959
+ idxmh->storage && idxmh->storage->ntotal == idxmh->ntotal);
807
960
  idxmh->own_fields = true;
808
- READ1 (idxmh->b);
809
- READ1 (idxmh->nhash);
810
- READ1 (idxmh->nflip);
811
- idxmh->maps.resize (idxmh->nhash);
961
+ READ1(idxmh->b);
962
+ READ1(idxmh->nhash);
963
+ READ1(idxmh->nflip);
964
+ idxmh->maps.resize(idxmh->nhash);
812
965
  for (int i = 0; i < idxmh->nhash; i++) {
813
966
  read_binary_multi_hash_map(
814
967
  idxmh->maps[i], idxmh->b, idxmh->ntotal, f);
@@ -816,25 +969,23 @@ IndexBinary *read_index_binary (IOReader *f, int io_flags) {
816
969
  idx = idxmh;
817
970
  } else {
818
971
  FAISS_THROW_FMT(
819
- "Index type %08x (\"%s\") not recognized",
820
- h, fourcc_inv_printable(h).c_str()
821
- );
972
+ "Index type %08x (\"%s\") not recognized",
973
+ h,
974
+ fourcc_inv_printable(h).c_str());
822
975
  idx = nullptr;
823
976
  }
824
977
  return idx;
825
978
  }
826
979
 
827
- IndexBinary *read_index_binary (FILE * f, int io_flags) {
980
+ IndexBinary* read_index_binary(FILE* f, int io_flags) {
828
981
  FileIOReader reader(f);
829
982
  return read_index_binary(&reader, io_flags);
830
983
  }
831
984
 
832
- IndexBinary *read_index_binary (const char *fname, int io_flags) {
985
+ IndexBinary* read_index_binary(const char* fname, int io_flags) {
833
986
  FileIOReader reader(fname);
834
- IndexBinary *idx = read_index_binary (&reader, io_flags);
987
+ IndexBinary* idx = read_index_binary(&reader, io_flags);
835
988
  return idx;
836
989
  }
837
990
 
838
-
839
-
840
991
  } // namespace faiss