faiss 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +7 -7
  5. data/ext/faiss/extconf.rb +6 -3
  6. data/ext/faiss/numo.hpp +4 -4
  7. data/ext/faiss/utils.cpp +1 -1
  8. data/ext/faiss/utils.h +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  11. data/vendor/faiss/faiss/AutoTune.h +55 -56
  12. data/vendor/faiss/faiss/Clustering.cpp +365 -194
  13. data/vendor/faiss/faiss/Clustering.h +102 -35
  14. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  15. data/vendor/faiss/faiss/IVFlib.h +48 -51
  16. data/vendor/faiss/faiss/Index.cpp +85 -103
  17. data/vendor/faiss/faiss/Index.h +54 -48
  18. data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
  19. data/vendor/faiss/faiss/Index2Layer.h +22 -36
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  21. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
  22. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  23. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  24. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  25. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  26. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  27. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  28. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  29. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  30. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  31. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  32. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  33. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  34. data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
  35. data/vendor/faiss/faiss/IndexFlat.h +42 -59
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  39. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  40. data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
  41. data/vendor/faiss/faiss/IndexIVF.h +169 -118
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
  54. data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
  55. data/vendor/faiss/faiss/IndexLSH.h +20 -38
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -82
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
  69. data/vendor/faiss/faiss/IndexRefine.h +32 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
  73. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
  74. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  75. data/vendor/faiss/faiss/IndexShards.h +85 -73
  76. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  77. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  78. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  79. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  80. data/vendor/faiss/faiss/MetricType.h +7 -7
  81. data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
  82. data/vendor/faiss/faiss/VectorTransform.h +64 -89
  83. data/vendor/faiss/faiss/clone_index.cpp +78 -73
  84. data/vendor/faiss/faiss/clone_index.h +4 -9
  85. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  86. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  87. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
  88. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  89. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  90. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  91. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  92. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  93. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  94. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  95. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  96. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  97. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  101. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  108. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  110. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  112. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  113. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  114. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  115. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  116. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  121. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  122. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  124. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  125. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  126. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  128. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  129. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  130. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  135. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  136. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  137. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  138. data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
  139. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
  142. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  144. data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
  145. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  146. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  148. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  149. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  151. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
  153. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  154. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  156. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  157. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  158. data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
  159. data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
  160. data/vendor/faiss/faiss/impl/io.cpp +76 -95
  161. data/vendor/faiss/faiss/impl/io.h +31 -41
  162. data/vendor/faiss/faiss/impl/io_macros.h +60 -29
  163. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  164. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  165. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  166. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  167. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  171. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  172. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  173. data/vendor/faiss/faiss/index_factory.cpp +619 -397
  174. data/vendor/faiss/faiss/index_factory.h +8 -6
  175. data/vendor/faiss/faiss/index_io.h +23 -26
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  177. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  178. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  179. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  180. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  181. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  183. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  185. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  186. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  187. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  188. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  189. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  190. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  191. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  192. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  193. data/vendor/faiss/faiss/utils/distances.cpp +305 -312
  194. data/vendor/faiss/faiss/utils/distances.h +170 -122
  195. data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
  196. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  197. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  198. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  199. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  200. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  201. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  202. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  203. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  204. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  205. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  206. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  207. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  208. data/vendor/faiss/faiss/utils/random.h +13 -16
  209. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  210. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  211. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  212. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  214. data/vendor/faiss/faiss/utils/utils.h +54 -49
  215. metadata +29 -4
@@ -9,11 +9,13 @@
9
9
 
10
10
  #include <faiss/index_io.h>
11
11
 
12
+ #include <faiss/impl/io_macros.h>
13
+
12
14
  #include <cstdio>
13
15
  #include <cstdlib>
14
16
 
15
- #include <sys/types.h>
16
17
  #include <sys/stat.h>
18
+ #include <sys/types.h>
17
19
 
18
20
  #include <faiss/impl/FaissAssert.h>
19
21
  #include <faiss/impl/io.h>
@@ -22,343 +24,434 @@
22
24
 
23
25
  #include <faiss/invlists/InvertedListsIOHook.h>
24
26
 
27
+ #include <faiss/Index2Layer.h>
28
+ #include <faiss/IndexAdditiveQuantizer.h>
25
29
  #include <faiss/IndexFlat.h>
26
- #include <faiss/VectorTransform.h>
27
- #include <faiss/IndexPreTransform.h>
28
- #include <faiss/IndexLSH.h>
29
- #include <faiss/IndexPQ.h>
30
+ #include <faiss/IndexHNSW.h>
30
31
  #include <faiss/IndexIVF.h>
32
+ #include <faiss/IndexIVFAdditiveQuantizer.h>
33
+ #include <faiss/IndexIVFFlat.h>
31
34
  #include <faiss/IndexIVFPQ.h>
35
+ #include <faiss/IndexIVFPQFastScan.h>
32
36
  #include <faiss/IndexIVFPQR.h>
33
- #include <faiss/Index2Layer.h>
34
- #include <faiss/IndexIVFFlat.h>
35
37
  #include <faiss/IndexIVFSpectralHash.h>
36
- #include <faiss/MetaIndexes.h>
37
- #include <faiss/IndexScalarQuantizer.h>
38
- #include <faiss/IndexHNSW.h>
38
+ #include <faiss/IndexLSH.h>
39
39
  #include <faiss/IndexLattice.h>
40
+ #include <faiss/IndexNSG.h>
41
+ #include <faiss/IndexPQ.h>
40
42
  #include <faiss/IndexPQFastScan.h>
41
- #include <faiss/IndexIVFPQFastScan.h>
43
+ #include <faiss/IndexPreTransform.h>
42
44
  #include <faiss/IndexRefine.h>
45
+ #include <faiss/IndexScalarQuantizer.h>
46
+ #include <faiss/MetaIndexes.h>
47
+ #include <faiss/VectorTransform.h>
43
48
 
44
49
  #include <faiss/IndexBinaryFlat.h>
45
50
  #include <faiss/IndexBinaryFromFloat.h>
46
51
  #include <faiss/IndexBinaryHNSW.h>
47
- #include <faiss/IndexBinaryIVF.h>
48
52
  #include <faiss/IndexBinaryHash.h>
53
+ #include <faiss/IndexBinaryIVF.h>
49
54
 
50
55
  namespace faiss {
51
56
 
52
-
53
57
  /*************************************************************
54
58
  * Read
55
59
  **************************************************************/
56
60
 
57
- static void read_index_header (Index *idx, IOReader *f) {
58
- READ1 (idx->d);
59
- READ1 (idx->ntotal);
61
+ static void read_index_header(Index* idx, IOReader* f) {
62
+ READ1(idx->d);
63
+ READ1(idx->ntotal);
60
64
  Index::idx_t dummy;
61
- READ1 (dummy);
62
- READ1 (dummy);
63
- READ1 (idx->is_trained);
64
- READ1 (idx->metric_type);
65
+ READ1(dummy);
66
+ READ1(dummy);
67
+ READ1(idx->is_trained);
68
+ READ1(idx->metric_type);
65
69
  if (idx->metric_type > 1) {
66
- READ1 (idx->metric_arg);
70
+ READ1(idx->metric_arg);
67
71
  }
68
72
  idx->verbose = false;
69
73
  }
70
74
 
71
- VectorTransform* read_VectorTransform (IOReader *f) {
75
+ VectorTransform* read_VectorTransform(IOReader* f) {
72
76
  uint32_t h;
73
- READ1 (h);
74
- VectorTransform *vt = nullptr;
75
-
76
- if (h == fourcc ("rrot") || h == fourcc ("PCAm") ||
77
- h == fourcc ("LTra") || h == fourcc ("PcAm") ||
78
- h == fourcc ("Viqm")) {
79
- LinearTransform *lt = nullptr;
80
- if (h == fourcc ("rrot")) {
81
- lt = new RandomRotationMatrix ();
82
- } else if (h == fourcc ("PCAm") ||
83
- h == fourcc ("PcAm")) {
84
- PCAMatrix * pca = new PCAMatrix ();
85
- READ1 (pca->eigen_power);
86
- READ1 (pca->random_rotation);
87
- if (h == fourcc ("PcAm"))
88
- READ1 (pca->balanced_bins);
89
- READVECTOR (pca->mean);
90
- READVECTOR (pca->eigenvalues);
91
- READVECTOR (pca->PCAMat);
77
+ READ1(h);
78
+ VectorTransform* vt = nullptr;
79
+
80
+ if (h == fourcc("rrot") || h == fourcc("PCAm") || h == fourcc("LTra") ||
81
+ h == fourcc("PcAm") || h == fourcc("Viqm") || h == fourcc("Pcam")) {
82
+ LinearTransform* lt = nullptr;
83
+ if (h == fourcc("rrot")) {
84
+ lt = new RandomRotationMatrix();
85
+ } else if (
86
+ h == fourcc("PCAm") || h == fourcc("PcAm") ||
87
+ h == fourcc("Pcam")) {
88
+ PCAMatrix* pca = new PCAMatrix();
89
+ READ1(pca->eigen_power);
90
+ if (h == fourcc("Pcam")) {
91
+ READ1(pca->epsilon);
92
+ }
93
+ READ1(pca->random_rotation);
94
+ if (h != fourcc("PCAm")) {
95
+ READ1(pca->balanced_bins);
96
+ }
97
+ READVECTOR(pca->mean);
98
+ READVECTOR(pca->eigenvalues);
99
+ READVECTOR(pca->PCAMat);
92
100
  lt = pca;
93
- } else if (h == fourcc ("Viqm")) {
94
- ITQMatrix *itqm = new ITQMatrix ();
95
- READ1 (itqm->max_iter);
96
- READ1 (itqm->seed);
101
+ } else if (h == fourcc("Viqm")) {
102
+ ITQMatrix* itqm = new ITQMatrix();
103
+ READ1(itqm->max_iter);
104
+ READ1(itqm->seed);
97
105
  lt = itqm;
98
- } else if (h == fourcc ("LTra")) {
99
- lt = new LinearTransform ();
106
+ } else if (h == fourcc("LTra")) {
107
+ lt = new LinearTransform();
100
108
  }
101
- READ1 (lt->have_bias);
102
- READVECTOR (lt->A);
103
- READVECTOR (lt->b);
104
- FAISS_THROW_IF_NOT (lt->A.size() >= lt->d_in * lt->d_out);
105
- FAISS_THROW_IF_NOT (!lt->have_bias || lt->b.size() >= lt->d_out);
109
+ READ1(lt->have_bias);
110
+ READVECTOR(lt->A);
111
+ READVECTOR(lt->b);
112
+ FAISS_THROW_IF_NOT(lt->A.size() >= lt->d_in * lt->d_out);
113
+ FAISS_THROW_IF_NOT(!lt->have_bias || lt->b.size() >= lt->d_out);
106
114
  lt->set_is_orthonormal();
107
115
  vt = lt;
108
- } else if (h == fourcc ("RmDT")) {
109
- RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
110
- READVECTOR (rdt->map);
116
+ } else if (h == fourcc("RmDT")) {
117
+ RemapDimensionsTransform* rdt = new RemapDimensionsTransform();
118
+ READVECTOR(rdt->map);
111
119
  vt = rdt;
112
- } else if (h == fourcc ("VNrm")) {
113
- NormalizationTransform *nt = new NormalizationTransform ();
114
- READ1 (nt->norm);
120
+ } else if (h == fourcc("VNrm")) {
121
+ NormalizationTransform* nt = new NormalizationTransform();
122
+ READ1(nt->norm);
115
123
  vt = nt;
116
- } else if (h == fourcc ("VCnt")) {
117
- CenteringTransform *ct = new CenteringTransform ();
118
- READVECTOR (ct->mean);
124
+ } else if (h == fourcc("VCnt")) {
125
+ CenteringTransform* ct = new CenteringTransform();
126
+ READVECTOR(ct->mean);
119
127
  vt = ct;
120
- } else if (h == fourcc ("Viqt")) {
121
- ITQTransform *itqt = new ITQTransform ();
128
+ } else if (h == fourcc("Viqt")) {
129
+ ITQTransform* itqt = new ITQTransform();
122
130
 
123
- READVECTOR (itqt->mean);
124
- READ1 (itqt->do_pca);
131
+ READVECTOR(itqt->mean);
132
+ READ1(itqt->do_pca);
125
133
  {
126
- ITQMatrix *itqm = dynamic_cast<ITQMatrix*>
127
- (read_VectorTransform (f));
134
+ ITQMatrix* itqm = dynamic_cast<ITQMatrix*>(read_VectorTransform(f));
128
135
  FAISS_THROW_IF_NOT(itqm);
129
136
  itqt->itq = *itqm;
130
137
  delete itqm;
131
138
  }
132
139
  {
133
- LinearTransform *pi = dynamic_cast<LinearTransform*>
134
- (read_VectorTransform (f));
135
- FAISS_THROW_IF_NOT (pi);
140
+ LinearTransform* pi =
141
+ dynamic_cast<LinearTransform*>(read_VectorTransform(f));
142
+ FAISS_THROW_IF_NOT(pi);
136
143
  itqt->pca_then_itq = *pi;
137
144
  delete pi;
138
145
  }
139
146
  vt = itqt;
140
147
  } else {
141
148
  FAISS_THROW_FMT(
142
- "fourcc %ud (\"%s\") not recognized",
143
- h, fourcc_inv_printable(h).c_str()
144
- );
149
+ "fourcc %ud (\"%s\") not recognized in %s",
150
+ h,
151
+ fourcc_inv_printable(h).c_str(),
152
+ f->name.c_str());
145
153
  }
146
- READ1 (vt->d_in);
147
- READ1 (vt->d_out);
148
- READ1 (vt->is_trained);
154
+ READ1(vt->d_in);
155
+ READ1(vt->d_out);
156
+ READ1(vt->is_trained);
149
157
  return vt;
150
158
  }
151
159
 
152
-
153
- static void read_ArrayInvertedLists_sizes (
154
- IOReader *f, std::vector<size_t> & sizes)
155
- {
160
+ static void read_ArrayInvertedLists_sizes(
161
+ IOReader* f,
162
+ std::vector<size_t>& sizes) {
156
163
  uint32_t list_type;
157
164
  READ1(list_type);
158
165
  if (list_type == fourcc("full")) {
159
166
  size_t os = sizes.size();
160
- READVECTOR (sizes);
161
- FAISS_THROW_IF_NOT (os == sizes.size());
167
+ READVECTOR(sizes);
168
+ FAISS_THROW_IF_NOT(os == sizes.size());
162
169
  } else if (list_type == fourcc("sprs")) {
163
170
  std::vector<size_t> idsizes;
164
- READVECTOR (idsizes);
171
+ READVECTOR(idsizes);
165
172
  for (size_t j = 0; j < idsizes.size(); j += 2) {
166
- FAISS_THROW_IF_NOT (idsizes[j] < sizes.size());
173
+ FAISS_THROW_IF_NOT(idsizes[j] < sizes.size());
167
174
  sizes[idsizes[j]] = idsizes[j + 1];
168
175
  }
169
176
  } else {
170
177
  FAISS_THROW_FMT(
171
- "list_type %ud (\"%s\") not recognized",
172
- list_type, fourcc_inv_printable(list_type).c_str()
173
- );
178
+ "list_type %ud (\"%s\") not recognized",
179
+ list_type,
180
+ fourcc_inv_printable(list_type).c_str());
174
181
  }
175
182
  }
176
183
 
177
- InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
184
+ InvertedLists* read_InvertedLists(IOReader* f, int io_flags) {
178
185
  uint32_t h;
179
- READ1 (h);
180
- if (h == fourcc ("il00")) {
181
- fprintf(stderr, "read_InvertedLists:"
186
+ READ1(h);
187
+ if (h == fourcc("il00")) {
188
+ fprintf(stderr,
189
+ "read_InvertedLists:"
182
190
  " WARN! inverted lists not stored with IVF object\n");
183
191
  return nullptr;
184
- } else if (h == fourcc ("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
185
- auto ails = new ArrayInvertedLists (0, 0);
186
- READ1 (ails->nlist);
187
- READ1 (ails->code_size);
188
- ails->ids.resize (ails->nlist);
189
- ails->codes.resize (ails->nlist);
190
- std::vector<size_t> sizes (ails->nlist);
191
- read_ArrayInvertedLists_sizes (f, sizes);
192
+ } else if (h == fourcc("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
193
+ auto ails = new ArrayInvertedLists(0, 0);
194
+ READ1(ails->nlist);
195
+ READ1(ails->code_size);
196
+ ails->ids.resize(ails->nlist);
197
+ ails->codes.resize(ails->nlist);
198
+ std::vector<size_t> sizes(ails->nlist);
199
+ read_ArrayInvertedLists_sizes(f, sizes);
192
200
  for (size_t i = 0; i < ails->nlist; i++) {
193
- ails->ids[i].resize (sizes[i]);
194
- ails->codes[i].resize (sizes[i] * ails->code_size);
201
+ ails->ids[i].resize(sizes[i]);
202
+ ails->codes[i].resize(sizes[i] * ails->code_size);
195
203
  }
196
204
  for (size_t i = 0; i < ails->nlist; i++) {
197
205
  size_t n = ails->ids[i].size();
198
206
  if (n > 0) {
199
- READANDCHECK (ails->codes[i].data(), n * ails->code_size);
200
- READANDCHECK (ails->ids[i].data(), n);
207
+ READANDCHECK(ails->codes[i].data(), n * ails->code_size);
208
+ READANDCHECK(ails->ids[i].data(), n);
201
209
  }
202
210
  }
203
211
  return ails;
204
212
 
205
- } else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
206
- // code is always ilxx where xx is specific to the type of invlists we want
207
- // so we get the 16 high bits from the io_flag and the 16 low bits as "il"
213
+ } else if (h == fourcc("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
214
+ // code is always ilxx where xx is specific to the type of invlists we
215
+ // want so we get the 16 high bits from the io_flag and the 16 low bits
216
+ // as "il"
208
217
  int h2 = (io_flags & 0xffff0000) | (fourcc("il__") & 0x0000ffff);
209
218
  size_t nlist, code_size;
210
- READ1 (nlist);
211
- READ1 (code_size);
212
- std::vector<size_t> sizes (nlist);
213
- read_ArrayInvertedLists_sizes (f, sizes);
219
+ READ1(nlist);
220
+ READ1(code_size);
221
+ std::vector<size_t> sizes(nlist);
222
+ read_ArrayInvertedLists_sizes(f, sizes);
214
223
  return InvertedListsIOHook::lookup(h2)->read_ArrayInvertedLists(
215
224
  f, io_flags, nlist, code_size, sizes);
216
225
  } else {
217
226
  return InvertedListsIOHook::lookup(h)->read(f, io_flags);
218
227
  }
219
-
220
228
  }
221
229
 
222
-
223
- static void read_InvertedLists (
224
- IndexIVF *ivf, IOReader *f, int io_flags) {
225
- InvertedLists *ils = read_InvertedLists (f, io_flags);
230
+ static void read_InvertedLists(IndexIVF* ivf, IOReader* f, int io_flags) {
231
+ InvertedLists* ils = read_InvertedLists(f, io_flags);
226
232
  if (ils) {
227
- FAISS_THROW_IF_NOT (ils->nlist == ivf->nlist);
228
- FAISS_THROW_IF_NOT (ils->code_size == InvertedLists::INVALID_CODE_SIZE ||
229
- ils->code_size == ivf->code_size);
233
+ FAISS_THROW_IF_NOT(ils->nlist == ivf->nlist);
234
+ FAISS_THROW_IF_NOT(
235
+ ils->code_size == InvertedLists::INVALID_CODE_SIZE ||
236
+ ils->code_size == ivf->code_size);
230
237
  }
231
238
  ivf->invlists = ils;
232
239
  ivf->own_invlists = true;
233
240
  }
234
241
 
235
- static void read_ProductQuantizer (ProductQuantizer *pq, IOReader *f) {
236
- READ1 (pq->d);
237
- READ1 (pq->M);
238
- READ1 (pq->nbits);
239
- pq->set_derived_values ();
240
- READVECTOR (pq->centroids);
242
+ static void read_ProductQuantizer(ProductQuantizer* pq, IOReader* f) {
243
+ READ1(pq->d);
244
+ READ1(pq->M);
245
+ READ1(pq->nbits);
246
+ pq->set_derived_values();
247
+ READVECTOR(pq->centroids);
241
248
  }
242
249
 
243
- static void read_ScalarQuantizer (ScalarQuantizer *ivsc, IOReader *f) {
244
- READ1 (ivsc->qtype);
245
- READ1 (ivsc->rangestat);
246
- READ1 (ivsc->rangestat_arg);
247
- READ1 (ivsc->d);
248
- READ1 (ivsc->code_size);
249
- READVECTOR (ivsc->trained);
250
- ivsc->set_derived_sizes ();
250
+ static void read_ResidualQuantizer_old(ResidualQuantizer* rq, IOReader* f) {
251
+ READ1(rq->d);
252
+ READ1(rq->M);
253
+ READVECTOR(rq->nbits);
254
+ READ1(rq->is_trained);
255
+ READ1(rq->train_type);
256
+ READ1(rq->max_beam_size);
257
+ READVECTOR(rq->codebooks);
258
+ READ1(rq->search_type);
259
+ READ1(rq->norm_min);
260
+ READ1(rq->norm_max);
261
+ rq->set_derived_values();
251
262
  }
252
263
 
264
+ static void read_AdditiveQuantizer(AdditiveQuantizer* aq, IOReader* f) {
265
+ READ1(aq->d);
266
+ READ1(aq->M);
267
+ READVECTOR(aq->nbits);
268
+ READ1(aq->is_trained);
269
+ READVECTOR(aq->codebooks);
270
+ READ1(aq->search_type);
271
+ READ1(aq->norm_min);
272
+ READ1(aq->norm_max);
273
+ if (aq->search_type == AdditiveQuantizer::ST_norm_cqint8 ||
274
+ aq->search_type == AdditiveQuantizer::ST_norm_cqint4) {
275
+ READXBVECTOR(aq->qnorm.codes);
276
+ }
277
+ aq->set_derived_values();
278
+ }
279
+
280
+ static void read_ResidualQuantizer(ResidualQuantizer* rq, IOReader* f) {
281
+ read_AdditiveQuantizer(rq, f);
282
+ READ1(rq->train_type);
283
+ READ1(rq->max_beam_size);
284
+ if (!(rq->train_type & ResidualQuantizer::Skip_codebook_tables)) {
285
+ rq->compute_codebook_tables();
286
+ }
287
+ }
288
+
289
+ static void read_LocalSearchQuantizer(LocalSearchQuantizer* lsq, IOReader* f) {
290
+ read_AdditiveQuantizer(lsq, f);
291
+ READ1(lsq->K);
292
+ READ1(lsq->train_iters);
293
+ READ1(lsq->encode_ils_iters);
294
+ READ1(lsq->train_ils_iters);
295
+ READ1(lsq->icm_iters);
296
+ READ1(lsq->p);
297
+ READ1(lsq->lambd);
298
+ READ1(lsq->chunk_size);
299
+ READ1(lsq->random_seed);
300
+ READ1(lsq->nperts);
301
+ READ1(lsq->update_codebooks_with_double);
302
+ }
303
+
304
+ static void read_ScalarQuantizer(ScalarQuantizer* ivsc, IOReader* f) {
305
+ READ1(ivsc->qtype);
306
+ READ1(ivsc->rangestat);
307
+ READ1(ivsc->rangestat_arg);
308
+ READ1(ivsc->d);
309
+ READ1(ivsc->code_size);
310
+ READVECTOR(ivsc->trained);
311
+ ivsc->set_derived_sizes();
312
+ }
253
313
 
254
- static void read_HNSW (HNSW *hnsw, IOReader *f) {
255
- READVECTOR (hnsw->assign_probas);
256
- READVECTOR (hnsw->cum_nneighbor_per_level);
257
- READVECTOR (hnsw->levels);
258
- READVECTOR (hnsw->offsets);
259
- READVECTOR (hnsw->neighbors);
314
+ static void read_HNSW(HNSW* hnsw, IOReader* f) {
315
+ READVECTOR(hnsw->assign_probas);
316
+ READVECTOR(hnsw->cum_nneighbor_per_level);
317
+ READVECTOR(hnsw->levels);
318
+ READVECTOR(hnsw->offsets);
319
+ READVECTOR(hnsw->neighbors);
320
+
321
+ READ1(hnsw->entry_point);
322
+ READ1(hnsw->max_level);
323
+ READ1(hnsw->efConstruction);
324
+ READ1(hnsw->efSearch);
325
+ READ1(hnsw->upper_beam);
326
+ }
327
+
328
+ static void read_NSG(NSG* nsg, IOReader* f) {
329
+ READ1(nsg->ntotal);
330
+ READ1(nsg->R);
331
+ READ1(nsg->L);
332
+ READ1(nsg->C);
333
+ READ1(nsg->search_L);
334
+ READ1(nsg->enterpoint);
335
+ READ1(nsg->is_built);
336
+
337
+ if (!nsg->is_built) {
338
+ return;
339
+ }
260
340
 
261
- READ1 (hnsw->entry_point);
262
- READ1 (hnsw->max_level);
263
- READ1 (hnsw->efConstruction);
264
- READ1 (hnsw->efSearch);
265
- READ1 (hnsw->upper_beam);
341
+ constexpr int EMPTY_ID = -1;
342
+ int N = nsg->ntotal;
343
+ int R = nsg->R;
344
+ auto& graph = nsg->final_graph;
345
+ graph = std::make_shared<nsg::Graph<int>>(N, R);
346
+ std::fill_n(graph->data, N * R, EMPTY_ID);
347
+
348
+ int size = 0;
349
+
350
+ for (int i = 0; i < N; i++) {
351
+ for (int j = 0; j < R + 1; j++) {
352
+ int id;
353
+ READ1(id);
354
+ if (id != EMPTY_ID) {
355
+ graph->at(i, j) = id;
356
+ size += 1;
357
+ } else {
358
+ break;
359
+ }
360
+ }
361
+ }
266
362
  }
267
363
 
268
- ProductQuantizer * read_ProductQuantizer (const char*fname) {
364
+ ProductQuantizer* read_ProductQuantizer(const char* fname) {
269
365
  FileIOReader reader(fname);
270
366
  return read_ProductQuantizer(&reader);
271
367
  }
272
368
 
273
- ProductQuantizer * read_ProductQuantizer (IOReader *reader) {
274
- ProductQuantizer *pq = new ProductQuantizer();
275
- ScopeDeleter1<ProductQuantizer> del (pq);
369
+ ProductQuantizer* read_ProductQuantizer(IOReader* reader) {
370
+ ProductQuantizer* pq = new ProductQuantizer();
371
+ ScopeDeleter1<ProductQuantizer> del(pq);
276
372
 
277
- read_ProductQuantizer(pq, reader);
278
- del.release ();
279
- return pq;
373
+ read_ProductQuantizer(pq, reader);
374
+ del.release();
375
+ return pq;
280
376
  }
281
377
 
282
- static void read_direct_map (DirectMap *dm, IOReader *f) {
378
+ static void read_direct_map(DirectMap* dm, IOReader* f) {
283
379
  char maintain_direct_map;
284
- READ1 (maintain_direct_map);
380
+ READ1(maintain_direct_map);
285
381
  dm->type = (DirectMap::Type)maintain_direct_map;
286
- READVECTOR (dm->array);
382
+ READVECTOR(dm->array);
287
383
  if (dm->type == DirectMap::Hashtable) {
288
384
  using idx_t = Index::idx_t;
289
385
  std::vector<std::pair<idx_t, idx_t>> v;
290
- READVECTOR (v);
291
- std::unordered_map<idx_t, idx_t> & map = dm->hashtable;
292
- map.reserve (v.size());
293
- for (auto it: v) {
294
- map [it.first] = it.second;
386
+ READVECTOR(v);
387
+ std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
388
+ map.reserve(v.size());
389
+ for (auto it : v) {
390
+ map[it.first] = it.second;
295
391
  }
296
392
  }
297
-
298
393
  }
299
394
 
300
-
301
- static void read_ivf_header (
302
- IndexIVF *ivf, IOReader *f,
303
- std::vector<std::vector<Index::idx_t> > *ids = nullptr)
304
- {
305
- read_index_header (ivf, f);
306
- READ1 (ivf->nlist);
307
- READ1 (ivf->nprobe);
308
- ivf->quantizer = read_index (f);
395
+ static void read_ivf_header(
396
+ IndexIVF* ivf,
397
+ IOReader* f,
398
+ std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
399
+ read_index_header(ivf, f);
400
+ READ1(ivf->nlist);
401
+ READ1(ivf->nprobe);
402
+ ivf->quantizer = read_index(f);
309
403
  ivf->own_fields = true;
310
404
  if (ids) { // used in legacy "Iv" formats
311
- ids->resize (ivf->nlist);
405
+ ids->resize(ivf->nlist);
312
406
  for (size_t i = 0; i < ivf->nlist; i++)
313
- READVECTOR ((*ids)[i]);
407
+ READVECTOR((*ids)[i]);
314
408
  }
315
- read_direct_map (&ivf->direct_map, f);
409
+ read_direct_map(&ivf->direct_map, f);
316
410
  }
317
411
 
318
412
  // used for legacy formats
319
- static ArrayInvertedLists *set_array_invlist(
320
- IndexIVF *ivf, std::vector<std::vector<Index::idx_t> > &ids)
321
- {
322
- ArrayInvertedLists *ail = new ArrayInvertedLists (
323
- ivf->nlist, ivf->code_size);
324
- std::swap (ail->ids, ids);
413
+ static ArrayInvertedLists* set_array_invlist(
414
+ IndexIVF* ivf,
415
+ std::vector<std::vector<Index::idx_t>>& ids) {
416
+ ArrayInvertedLists* ail =
417
+ new ArrayInvertedLists(ivf->nlist, ivf->code_size);
418
+ std::swap(ail->ids, ids);
325
419
  ivf->invlists = ail;
326
420
  ivf->own_invlists = true;
327
421
  return ail;
328
422
  }
329
423
 
330
- static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags)
331
- {
332
- bool legacy = h == fourcc ("IvQR") || h == fourcc ("IvPQ");
424
+ static IndexIVFPQ* read_ivfpq(IOReader* f, uint32_t h, int io_flags) {
425
+ bool legacy = h == fourcc("IvQR") || h == fourcc("IvPQ");
333
426
 
334
- IndexIVFPQR *ivfpqr =
335
- h == fourcc ("IvQR") || h == fourcc ("IwQR") ?
336
- new IndexIVFPQR () : nullptr;
337
- IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ ();
427
+ IndexIVFPQR* ivfpqr = h == fourcc("IvQR") || h == fourcc("IwQR")
428
+ ? new IndexIVFPQR()
429
+ : nullptr;
430
+ IndexIVFPQ* ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ();
338
431
 
339
- std::vector<std::vector<Index::idx_t> > ids;
340
- read_ivf_header (ivpq, f, legacy ? &ids : nullptr);
341
- READ1 (ivpq->by_residual);
342
- READ1 (ivpq->code_size);
343
- read_ProductQuantizer (&ivpq->pq, f);
432
+ std::vector<std::vector<Index::idx_t>> ids;
433
+ read_ivf_header(ivpq, f, legacy ? &ids : nullptr);
434
+ READ1(ivpq->by_residual);
435
+ READ1(ivpq->code_size);
436
+ read_ProductQuantizer(&ivpq->pq, f);
344
437
 
345
438
  if (legacy) {
346
- ArrayInvertedLists *ail = set_array_invlist (ivpq, ids);
439
+ ArrayInvertedLists* ail = set_array_invlist(ivpq, ids);
347
440
  for (size_t i = 0; i < ail->nlist; i++)
348
- READVECTOR (ail->codes[i]);
441
+ READVECTOR(ail->codes[i]);
349
442
  } else {
350
- read_InvertedLists (ivpq, f, io_flags);
443
+ read_InvertedLists(ivpq, f, io_flags);
351
444
  }
352
445
 
353
446
  if (ivpq->is_trained) {
354
447
  // precomputed table not stored. It is cheaper to recompute it
355
448
  ivpq->use_precomputed_table = 0;
356
449
  if (ivpq->by_residual)
357
- ivpq->precompute_table ();
450
+ ivpq->precompute_table();
358
451
  if (ivfpqr) {
359
- read_ProductQuantizer (&ivfpqr->refine_pq, f);
360
- READVECTOR (ivfpqr->refine_codes);
361
- READ1 (ivfpqr->k_factor);
452
+ read_ProductQuantizer(&ivfpqr->refine_pq, f);
453
+ READVECTOR(ivfpqr->refine_codes);
454
+ READ1(ivfpqr->k_factor);
362
455
  }
363
456
  }
364
457
  return ivpq;
@@ -366,200 +459,248 @@ static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags)
366
459
 
367
460
  int read_old_fmt_hack = 0;
368
461
 
369
- Index *read_index (IOReader *f, int io_flags) {
370
- Index * idx = nullptr;
462
+ Index* read_index(IOReader* f, int io_flags) {
463
+ Index* idx = nullptr;
371
464
  uint32_t h;
372
- READ1 (h);
373
- if (h == fourcc ("IxFI") || h == fourcc ("IxF2") || h == fourcc("IxFl")) {
374
- IndexFlat *idxf;
375
- if (h == fourcc ("IxFI")) {
376
- idxf = new IndexFlatIP ();
465
+ READ1(h);
466
+ if (h == fourcc("IxFI") || h == fourcc("IxF2") || h == fourcc("IxFl")) {
467
+ IndexFlat* idxf;
468
+ if (h == fourcc("IxFI")) {
469
+ idxf = new IndexFlatIP();
377
470
  } else if (h == fourcc("IxF2")) {
378
- idxf = new IndexFlatL2 ();
471
+ idxf = new IndexFlatL2();
379
472
  } else {
380
- idxf = new IndexFlat ();
473
+ idxf = new IndexFlat();
381
474
  }
382
- read_index_header (idxf, f);
383
- READVECTOR (idxf->xb);
384
- FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->d);
475
+ read_index_header(idxf, f);
476
+ idxf->code_size = idxf->d * sizeof(float);
477
+ READXBVECTOR(idxf->codes);
478
+ FAISS_THROW_IF_NOT(
479
+ idxf->codes.size() == idxf->ntotal * idxf->code_size);
385
480
  // leak!
386
481
  idx = idxf;
387
482
  } else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
388
- IndexLSH * idxl = new IndexLSH ();
389
- read_index_header (idxl, f);
390
- READ1 (idxl->nbits);
391
- READ1 (idxl->rotate_data);
392
- READ1 (idxl->train_thresholds);
393
- READVECTOR (idxl->thresholds);
394
- READ1 (idxl->bytes_per_vec);
483
+ IndexLSH* idxl = new IndexLSH();
484
+ read_index_header(idxl, f);
485
+ READ1(idxl->nbits);
486
+ READ1(idxl->rotate_data);
487
+ READ1(idxl->train_thresholds);
488
+ READVECTOR(idxl->thresholds);
489
+ int code_size_i;
490
+ READ1(code_size_i);
491
+ idxl->code_size = code_size_i;
395
492
  if (h == fourcc("IxHE")) {
396
- FAISS_THROW_IF_NOT_FMT (idxl->nbits % 64 == 0,
397
- "can only read old format IndexLSH with "
398
- "nbits multiple of 64 (got %d)",
399
- (int) idxl->nbits);
493
+ FAISS_THROW_IF_NOT_FMT(
494
+ idxl->nbits % 64 == 0,
495
+ "can only read old format IndexLSH with "
496
+ "nbits multiple of 64 (got %d)",
497
+ (int)idxl->nbits);
400
498
  // leak
401
- idxl->bytes_per_vec *= 8;
499
+ idxl->code_size *= 8;
402
500
  }
403
501
  {
404
- RandomRotationMatrix *rrot = dynamic_cast<RandomRotationMatrix *>
405
- (read_VectorTransform (f));
502
+ RandomRotationMatrix* rrot = dynamic_cast<RandomRotationMatrix*>(
503
+ read_VectorTransform(f));
406
504
  FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation");
407
505
  idxl->rrot = *rrot;
408
506
  delete rrot;
409
507
  }
410
- READVECTOR (idxl->codes);
411
- FAISS_THROW_IF_NOT (idxl->rrot.d_in == idxl->d &&
412
- idxl->rrot.d_out == idxl->nbits);
413
- FAISS_THROW_IF_NOT (
414
- idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
508
+ READVECTOR(idxl->codes);
509
+ FAISS_THROW_IF_NOT(
510
+ idxl->rrot.d_in == idxl->d && idxl->rrot.d_out == idxl->nbits);
511
+ FAISS_THROW_IF_NOT(
512
+ idxl->codes.size() == idxl->ntotal * idxl->code_size);
415
513
  idx = idxl;
416
- } else if (h == fourcc ("IxPQ") || h == fourcc ("IxPo") ||
417
- h == fourcc ("IxPq")) {
514
+ } else if (
515
+ h == fourcc("IxPQ") || h == fourcc("IxPo") || h == fourcc("IxPq")) {
418
516
  // IxPQ and IxPo were merged into the same IndexPQ object
419
- IndexPQ * idxp =new IndexPQ ();
420
- read_index_header (idxp, f);
421
- read_ProductQuantizer (&idxp->pq, f);
422
- READVECTOR (idxp->codes);
423
- if (h == fourcc ("IxPo") || h == fourcc ("IxPq")) {
424
- READ1 (idxp->search_type);
425
- READ1 (idxp->encode_signs);
426
- READ1 (idxp->polysemous_ht);
517
+ IndexPQ* idxp = new IndexPQ();
518
+ read_index_header(idxp, f);
519
+ read_ProductQuantizer(&idxp->pq, f);
520
+ idxp->code_size = idxp->pq.code_size;
521
+ READVECTOR(idxp->codes);
522
+ if (h == fourcc("IxPo") || h == fourcc("IxPq")) {
523
+ READ1(idxp->search_type);
524
+ READ1(idxp->encode_signs);
525
+ READ1(idxp->polysemous_ht);
427
526
  }
428
527
  // Old versoins of PQ all had metric_type set to INNER_PRODUCT
429
528
  // when they were in fact using L2. Therefore, we force metric type
430
529
  // to L2 when the old format is detected
431
- if (h == fourcc ("IxPQ") || h == fourcc ("IxPo")) {
530
+ if (h == fourcc("IxPQ") || h == fourcc("IxPo")) {
432
531
  idxp->metric_type = METRIC_L2;
433
532
  }
434
533
  idx = idxp;
435
- } else if (h == fourcc ("IvFl") || h == fourcc("IvFL")) { // legacy
436
- IndexIVFFlat * ivfl = new IndexIVFFlat ();
437
- std::vector<std::vector<Index::idx_t> > ids;
438
- read_ivf_header (ivfl, f, &ids);
534
+ } else if (h == fourcc("IxRQ") || h == fourcc("IxRq")) {
535
+ IndexResidualQuantizer* idxr = new IndexResidualQuantizer();
536
+ read_index_header(idxr, f);
537
+ if (h == fourcc("IxRQ")) {
538
+ read_ResidualQuantizer_old(&idxr->rq, f);
539
+ } else {
540
+ read_ResidualQuantizer(&idxr->rq, f);
541
+ }
542
+ READ1(idxr->code_size);
543
+ READVECTOR(idxr->codes);
544
+ idx = idxr;
545
+ } else if (h == fourcc("IxLS")) {
546
+ auto idxr = new IndexLocalSearchQuantizer();
547
+ read_index_header(idxr, f);
548
+ read_LocalSearchQuantizer(&idxr->lsq, f);
549
+ READ1(idxr->code_size);
550
+ READVECTOR(idxr->codes);
551
+ idx = idxr;
552
+ } else if (h == fourcc("ImRQ")) {
553
+ ResidualCoarseQuantizer* idxr = new ResidualCoarseQuantizer();
554
+ read_index_header(idxr, f);
555
+ read_ResidualQuantizer(&idxr->rq, f);
556
+ READ1(idxr->beam_factor);
557
+ idxr->set_beam_factor(idxr->beam_factor);
558
+ idx = idxr;
559
+ } else if (h == fourcc("IvFl") || h == fourcc("IvFL")) { // legacy
560
+ IndexIVFFlat* ivfl = new IndexIVFFlat();
561
+ std::vector<std::vector<Index::idx_t>> ids;
562
+ read_ivf_header(ivfl, f, &ids);
439
563
  ivfl->code_size = ivfl->d * sizeof(float);
440
- ArrayInvertedLists *ail = set_array_invlist (ivfl, ids);
564
+ ArrayInvertedLists* ail = set_array_invlist(ivfl, ids);
441
565
 
442
- if (h == fourcc ("IvFL")) {
566
+ if (h == fourcc("IvFL")) {
443
567
  for (size_t i = 0; i < ivfl->nlist; i++) {
444
- READVECTOR (ail->codes[i]);
568
+ READVECTOR(ail->codes[i]);
445
569
  }
446
570
  } else { // old format
447
571
  for (size_t i = 0; i < ivfl->nlist; i++) {
448
572
  std::vector<float> vec;
449
- READVECTOR (vec);
573
+ READVECTOR(vec);
450
574
  ail->codes[i].resize(vec.size() * sizeof(float));
451
- memcpy(ail->codes[i].data(), vec.data(),
452
- ail->codes[i].size());
575
+ memcpy(ail->codes[i].data(), vec.data(), ail->codes[i].size());
453
576
  }
454
577
  }
455
578
  idx = ivfl;
456
- } else if (h == fourcc ("IwFd")) {
457
- IndexIVFFlatDedup * ivfl = new IndexIVFFlatDedup ();
458
- read_ivf_header (ivfl, f);
579
+ } else if (h == fourcc("IwFd")) {
580
+ IndexIVFFlatDedup* ivfl = new IndexIVFFlatDedup();
581
+ read_ivf_header(ivfl, f);
459
582
  ivfl->code_size = ivfl->d * sizeof(float);
460
583
  {
461
584
  std::vector<Index::idx_t> tab;
462
- READVECTOR (tab);
585
+ READVECTOR(tab);
463
586
  for (long i = 0; i < tab.size(); i += 2) {
464
- std::pair<Index::idx_t, Index::idx_t>
465
- pair (tab[i], tab[i + 1]);
466
- ivfl->instances.insert (pair);
587
+ std::pair<Index::idx_t, Index::idx_t> pair(tab[i], tab[i + 1]);
588
+ ivfl->instances.insert(pair);
467
589
  }
468
590
  }
469
- read_InvertedLists (ivfl, f, io_flags);
591
+ read_InvertedLists(ivfl, f, io_flags);
470
592
  idx = ivfl;
471
- } else if (h == fourcc ("IwFl")) {
472
- IndexIVFFlat * ivfl = new IndexIVFFlat ();
473
- read_ivf_header (ivfl, f);
593
+ } else if (h == fourcc("IwFl")) {
594
+ IndexIVFFlat* ivfl = new IndexIVFFlat();
595
+ read_ivf_header(ivfl, f);
474
596
  ivfl->code_size = ivfl->d * sizeof(float);
475
- read_InvertedLists (ivfl, f, io_flags);
597
+ read_InvertedLists(ivfl, f, io_flags);
476
598
  idx = ivfl;
477
- } else if (h == fourcc ("IxSQ")) {
478
- IndexScalarQuantizer * idxs = new IndexScalarQuantizer ();
479
- read_index_header (idxs, f);
480
- read_ScalarQuantizer (&idxs->sq, f);
481
- READVECTOR (idxs->codes);
599
+ } else if (h == fourcc("IxSQ")) {
600
+ IndexScalarQuantizer* idxs = new IndexScalarQuantizer();
601
+ read_index_header(idxs, f);
602
+ read_ScalarQuantizer(&idxs->sq, f);
603
+ READVECTOR(idxs->codes);
482
604
  idxs->code_size = idxs->sq.code_size;
483
605
  idx = idxs;
484
- } else if (h == fourcc ("IxLa")) {
606
+ } else if (h == fourcc("IxLa")) {
485
607
  int d, nsq, scale_nbit, r2;
486
- READ1 (d);
487
- READ1 (nsq);
488
- READ1 (scale_nbit);
489
- READ1 (r2);
490
- IndexLattice *idxl = new IndexLattice (d, nsq, scale_nbit, r2);
491
- read_index_header (idxl, f);
492
- READVECTOR (idxl->trained);
608
+ READ1(d);
609
+ READ1(nsq);
610
+ READ1(scale_nbit);
611
+ READ1(r2);
612
+ IndexLattice* idxl = new IndexLattice(d, nsq, scale_nbit, r2);
613
+ read_index_header(idxl, f);
614
+ READVECTOR(idxl->trained);
493
615
  idx = idxl;
494
- } else if(h == fourcc ("IvSQ")) { // legacy
495
- IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
496
- std::vector<std::vector<Index::idx_t> > ids;
497
- read_ivf_header (ivsc, f, &ids);
498
- read_ScalarQuantizer (&ivsc->sq, f);
499
- READ1 (ivsc->code_size);
500
- ArrayInvertedLists *ail = set_array_invlist (ivsc, ids);
501
- for(int i = 0; i < ivsc->nlist; i++)
502
- READVECTOR (ail->codes[i]);
616
+ } else if (h == fourcc("IvSQ")) { // legacy
617
+ IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
618
+ std::vector<std::vector<Index::idx_t>> ids;
619
+ read_ivf_header(ivsc, f, &ids);
620
+ read_ScalarQuantizer(&ivsc->sq, f);
621
+ READ1(ivsc->code_size);
622
+ ArrayInvertedLists* ail = set_array_invlist(ivsc, ids);
623
+ for (int i = 0; i < ivsc->nlist; i++)
624
+ READVECTOR(ail->codes[i]);
503
625
  idx = ivsc;
504
- } else if(h == fourcc ("IwSQ") || h == fourcc ("IwSq")) {
505
- IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
506
- read_ivf_header (ivsc, f);
507
- read_ScalarQuantizer (&ivsc->sq, f);
508
- READ1 (ivsc->code_size);
509
- if (h == fourcc ("IwSQ")) {
626
+ } else if (h == fourcc("IwSQ") || h == fourcc("IwSq")) {
627
+ IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
628
+ read_ivf_header(ivsc, f);
629
+ read_ScalarQuantizer(&ivsc->sq, f);
630
+ READ1(ivsc->code_size);
631
+ if (h == fourcc("IwSQ")) {
510
632
  ivsc->by_residual = true;
511
633
  } else {
512
- READ1 (ivsc->by_residual);
634
+ READ1(ivsc->by_residual);
513
635
  }
514
- read_InvertedLists (ivsc, f, io_flags);
636
+ read_InvertedLists(ivsc, f, io_flags);
515
637
  idx = ivsc;
516
- } else if(h == fourcc ("IwSh")) {
517
- IndexIVFSpectralHash *ivsp = new IndexIVFSpectralHash ();
518
- read_ivf_header (ivsp, f);
519
- ivsp->vt = read_VectorTransform (f);
638
+ } else if (h == fourcc("IwLS") || h == fourcc("IwRQ")) {
639
+ bool is_LSQ = h == fourcc("IwLS");
640
+ IndexIVFAdditiveQuantizer* iva;
641
+ if (is_LSQ) {
642
+ iva = new IndexIVFLocalSearchQuantizer();
643
+ } else {
644
+ iva = new IndexIVFResidualQuantizer();
645
+ }
646
+ read_ivf_header(iva, f);
647
+ READ1(iva->code_size);
648
+ if (is_LSQ) {
649
+ read_LocalSearchQuantizer((LocalSearchQuantizer*)iva->aq, f);
650
+ } else {
651
+ read_ResidualQuantizer((ResidualQuantizer*)iva->aq, f);
652
+ }
653
+ READ1(iva->by_residual);
654
+ READ1(iva->use_precomputed_table);
655
+ read_InvertedLists(iva, f, io_flags);
656
+ idx = iva;
657
+ } else if (h == fourcc("IwSh")) {
658
+ IndexIVFSpectralHash* ivsp = new IndexIVFSpectralHash();
659
+ read_ivf_header(ivsp, f);
660
+ ivsp->vt = read_VectorTransform(f);
520
661
  ivsp->own_fields = true;
521
- READ1 (ivsp->nbit);
662
+ READ1(ivsp->nbit);
522
663
  // not stored by write_ivf_header
523
664
  ivsp->code_size = (ivsp->nbit + 7) / 8;
524
- READ1 (ivsp->period);
525
- READ1 (ivsp->threshold_type);
526
- READVECTOR (ivsp->trained);
527
- read_InvertedLists (ivsp, f, io_flags);
665
+ READ1(ivsp->period);
666
+ READ1(ivsp->threshold_type);
667
+ READVECTOR(ivsp->trained);
668
+ read_InvertedLists(ivsp, f, io_flags);
528
669
  idx = ivsp;
529
- } else if(h == fourcc ("IvPQ") || h == fourcc ("IvQR") ||
530
- h == fourcc ("IwPQ") || h == fourcc ("IwQR")) {
670
+ } else if (
671
+ h == fourcc("IvPQ") || h == fourcc("IvQR") || h == fourcc("IwPQ") ||
672
+ h == fourcc("IwQR")) {
673
+ idx = read_ivfpq(f, h, io_flags);
531
674
 
532
- idx = read_ivfpq (f, h, io_flags);
533
-
534
- } else if(h == fourcc ("IxPT")) {
535
- IndexPreTransform * ixpt = new IndexPreTransform();
675
+ } else if (h == fourcc("IxPT")) {
676
+ IndexPreTransform* ixpt = new IndexPreTransform();
536
677
  ixpt->own_fields = true;
537
- read_index_header (ixpt, f);
678
+ read_index_header(ixpt, f);
538
679
  int nt;
539
680
  if (read_old_fmt_hack == 2) {
540
681
  nt = 1;
541
682
  } else {
542
- READ1 (nt);
683
+ READ1(nt);
543
684
  }
544
685
  for (int i = 0; i < nt; i++) {
545
- ixpt->chain.push_back (read_VectorTransform (f));
686
+ ixpt->chain.push_back(read_VectorTransform(f));
546
687
  }
547
- ixpt->index = read_index (f, io_flags);
688
+ ixpt->index = read_index(f, io_flags);
548
689
  idx = ixpt;
549
- } else if(h == fourcc ("Imiq")) {
550
- MultiIndexQuantizer * imiq = new MultiIndexQuantizer ();
551
- read_index_header (imiq, f);
552
- read_ProductQuantizer (&imiq->pq, f);
690
+ } else if (h == fourcc("Imiq")) {
691
+ MultiIndexQuantizer* imiq = new MultiIndexQuantizer();
692
+ read_index_header(imiq, f);
693
+ read_ProductQuantizer(&imiq->pq, f);
553
694
  idx = imiq;
554
- } else if(h == fourcc ("IxRF")) {
555
- IndexRefine *idxrf = new IndexRefine ();
556
- read_index_header (idxrf, f);
695
+ } else if (h == fourcc("IxRF")) {
696
+ IndexRefine* idxrf = new IndexRefine();
697
+ read_index_header(idxrf, f);
557
698
  idxrf->base_index = read_index(f, io_flags);
558
699
  idxrf->refine_index = read_index(f, io_flags);
559
- READ1 (idxrf->k_factor);
700
+ READ1(idxrf->k_factor);
560
701
  if (dynamic_cast<IndexFlat*>(idxrf->refine_index)) {
561
702
  // then make a RefineFlat with it
562
- IndexRefine *idxrf_old = idxrf;
703
+ IndexRefine* idxrf_old = idxrf;
563
704
  idxrf = new IndexRefineFlat();
564
705
  *idxrf = *idxrf_old;
565
706
  delete idxrf_old;
@@ -567,248 +708,260 @@ Index *read_index (IOReader *f, int io_flags) {
567
708
  idxrf->own_fields = true;
568
709
  idxrf->own_refine_index = true;
569
710
  idx = idxrf;
570
- } else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) {
571
- bool is_map2 = h == fourcc ("IxM2");
572
- IndexIDMap * idxmap = is_map2 ? new IndexIDMap2 () : new IndexIDMap ();
573
- read_index_header (idxmap, f);
574
- idxmap->index = read_index (f, io_flags);
711
+ } else if (h == fourcc("IxMp") || h == fourcc("IxM2")) {
712
+ bool is_map2 = h == fourcc("IxM2");
713
+ IndexIDMap* idxmap = is_map2 ? new IndexIDMap2() : new IndexIDMap();
714
+ read_index_header(idxmap, f);
715
+ idxmap->index = read_index(f, io_flags);
575
716
  idxmap->own_fields = true;
576
- READVECTOR (idxmap->id_map);
717
+ READVECTOR(idxmap->id_map);
577
718
  if (is_map2) {
578
- static_cast<IndexIDMap2*>(idxmap)->construct_rev_map ();
719
+ static_cast<IndexIDMap2*>(idxmap)->construct_rev_map();
579
720
  }
580
721
  idx = idxmap;
581
- } else if (h == fourcc ("Ix2L")) {
582
- Index2Layer * idxp = new Index2Layer ();
583
- read_index_header (idxp, f);
584
- idxp->q1.quantizer = read_index (f, io_flags);
585
- READ1 (idxp->q1.nlist);
586
- READ1 (idxp->q1.quantizer_trains_alone);
587
- read_ProductQuantizer (&idxp->pq, f);
588
- READ1 (idxp->code_size_1);
589
- READ1 (idxp->code_size_2);
590
- READ1 (idxp->code_size);
591
- READVECTOR (idxp->codes);
722
+ } else if (h == fourcc("Ix2L")) {
723
+ Index2Layer* idxp = new Index2Layer();
724
+ read_index_header(idxp, f);
725
+ idxp->q1.quantizer = read_index(f, io_flags);
726
+ READ1(idxp->q1.nlist);
727
+ READ1(idxp->q1.quantizer_trains_alone);
728
+ read_ProductQuantizer(&idxp->pq, f);
729
+ READ1(idxp->code_size_1);
730
+ READ1(idxp->code_size_2);
731
+ READ1(idxp->code_size);
732
+ READVECTOR(idxp->codes);
592
733
  idx = idxp;
593
- } else if(h == fourcc("IHNf") || h == fourcc("IHNp") ||
594
- h == fourcc("IHNs") || h == fourcc("IHN2")) {
595
- IndexHNSW *idxhnsw = nullptr;
596
- if (h == fourcc("IHNf")) idxhnsw = new IndexHNSWFlat ();
597
- if (h == fourcc("IHNp")) idxhnsw = new IndexHNSWPQ ();
598
- if (h == fourcc("IHNs")) idxhnsw = new IndexHNSWSQ ();
599
- if (h == fourcc("IHN2")) idxhnsw = new IndexHNSW2Level ();
600
- read_index_header (idxhnsw, f);
601
- read_HNSW (&idxhnsw->hnsw, f);
602
- idxhnsw->storage = read_index (f, io_flags);
734
+ } else if (
735
+ h == fourcc("IHNf") || h == fourcc("IHNp") || h == fourcc("IHNs") ||
736
+ h == fourcc("IHN2")) {
737
+ IndexHNSW* idxhnsw = nullptr;
738
+ if (h == fourcc("IHNf"))
739
+ idxhnsw = new IndexHNSWFlat();
740
+ if (h == fourcc("IHNp"))
741
+ idxhnsw = new IndexHNSWPQ();
742
+ if (h == fourcc("IHNs"))
743
+ idxhnsw = new IndexHNSWSQ();
744
+ if (h == fourcc("IHN2"))
745
+ idxhnsw = new IndexHNSW2Level();
746
+ read_index_header(idxhnsw, f);
747
+ read_HNSW(&idxhnsw->hnsw, f);
748
+ idxhnsw->storage = read_index(f, io_flags);
603
749
  idxhnsw->own_fields = true;
604
750
  if (h == fourcc("IHNp")) {
605
- dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table ();
751
+ dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table();
606
752
  }
607
753
  idx = idxhnsw;
608
- } else if(h == fourcc("IPfs")) {
609
- IndexPQFastScan *idxpqfs = new IndexPQFastScan();
610
- read_index_header (idxpqfs, f);
611
- read_ProductQuantizer (&idxpqfs->pq, f);
612
- READ1 (idxpqfs->implem);
613
- READ1 (idxpqfs->bbs);
614
- READ1 (idxpqfs->qbs);
615
- READ1 (idxpqfs->ntotal2);
616
- READ1 (idxpqfs->M2);
617
- READVECTOR (idxpqfs->codes);
754
+ } else if (h == fourcc("INSf")) {
755
+ IndexNSG* idxnsg = new IndexNSGFlat();
756
+ read_index_header(idxnsg, f);
757
+ READ1(idxnsg->GK);
758
+ READ1(idxnsg->build_type);
759
+ READ1(idxnsg->nndescent_S);
760
+ READ1(idxnsg->nndescent_R);
761
+ READ1(idxnsg->nndescent_L);
762
+ READ1(idxnsg->nndescent_iter);
763
+ read_NSG(&idxnsg->nsg, f);
764
+ idxnsg->storage = read_index(f, io_flags);
765
+ idxnsg->own_fields = true;
766
+ idx = idxnsg;
767
+ } else if (h == fourcc("IPfs")) {
768
+ IndexPQFastScan* idxpqfs = new IndexPQFastScan();
769
+ read_index_header(idxpqfs, f);
770
+ read_ProductQuantizer(&idxpqfs->pq, f);
771
+ READ1(idxpqfs->implem);
772
+ READ1(idxpqfs->bbs);
773
+ READ1(idxpqfs->qbs);
774
+ READ1(idxpqfs->ntotal2);
775
+ READ1(idxpqfs->M2);
776
+ READVECTOR(idxpqfs->codes);
618
777
  idx = idxpqfs;
619
778
 
620
779
  } else if (h == fourcc("IwPf")) {
621
- IndexIVFPQFastScan *ivpq = new IndexIVFPQFastScan();
622
- read_ivf_header (ivpq, f);
623
- READ1 (ivpq->by_residual);
624
- READ1 (ivpq->code_size);
625
- READ1 (ivpq->bbs);
626
- READ1 (ivpq->M2);
627
- READ1 (ivpq->implem);
628
- READ1 (ivpq->qbs2);
629
- read_ProductQuantizer (&ivpq->pq, f);
630
- read_InvertedLists (ivpq, f, io_flags);
780
+ IndexIVFPQFastScan* ivpq = new IndexIVFPQFastScan();
781
+ read_ivf_header(ivpq, f);
782
+ READ1(ivpq->by_residual);
783
+ READ1(ivpq->code_size);
784
+ READ1(ivpq->bbs);
785
+ READ1(ivpq->M2);
786
+ READ1(ivpq->implem);
787
+ READ1(ivpq->qbs2);
788
+ read_ProductQuantizer(&ivpq->pq, f);
789
+ read_InvertedLists(ivpq, f, io_flags);
631
790
  ivpq->precompute_table();
632
791
  idx = ivpq;
633
792
  } else {
634
793
  FAISS_THROW_FMT(
635
- "Index type 0x%08x (\"%s\") not recognized",
636
- h, fourcc_inv_printable(h).c_str()
637
- );
794
+ "Index type 0x%08x (\"%s\") not recognized",
795
+ h,
796
+ fourcc_inv_printable(h).c_str());
638
797
  idx = nullptr;
639
798
  }
640
799
  return idx;
641
800
  }
642
801
 
643
-
644
- Index *read_index (FILE * f, int io_flags) {
802
+ Index* read_index(FILE* f, int io_flags) {
645
803
  FileIOReader reader(f);
646
804
  return read_index(&reader, io_flags);
647
805
  }
648
806
 
649
- Index *read_index (const char *fname, int io_flags) {
807
+ Index* read_index(const char* fname, int io_flags) {
650
808
  FileIOReader reader(fname);
651
- Index *idx = read_index (&reader, io_flags);
809
+ Index* idx = read_index(&reader, io_flags);
652
810
  return idx;
653
811
  }
654
812
 
655
- VectorTransform *read_VectorTransform (const char *fname) {
813
+ VectorTransform* read_VectorTransform(const char* fname) {
656
814
  FileIOReader reader(fname);
657
- VectorTransform *vt = read_VectorTransform (&reader);
815
+ VectorTransform* vt = read_VectorTransform(&reader);
658
816
  return vt;
659
817
  }
660
818
 
661
-
662
-
663
819
  /*************************************************************
664
820
  * Read binary indexes
665
821
  **************************************************************/
666
822
 
667
- static void read_InvertedLists (
668
- IndexBinaryIVF *ivf, IOReader *f, int io_flags) {
669
- InvertedLists *ils = read_InvertedLists (f, io_flags);
670
- FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist &&
671
- ils->code_size == ivf->code_size));
823
+ static void read_InvertedLists(IndexBinaryIVF* ivf, IOReader* f, int io_flags) {
824
+ InvertedLists* ils = read_InvertedLists(f, io_flags);
825
+ FAISS_THROW_IF_NOT(
826
+ !ils ||
827
+ (ils->nlist == ivf->nlist && ils->code_size == ivf->code_size));
672
828
  ivf->invlists = ils;
673
829
  ivf->own_invlists = true;
674
830
  }
675
831
 
676
-
677
-
678
- static void read_index_binary_header (IndexBinary *idx, IOReader *f) {
679
- READ1 (idx->d);
680
- READ1 (idx->code_size);
681
- READ1 (idx->ntotal);
682
- READ1 (idx->is_trained);
683
- READ1 (idx->metric_type);
832
+ static void read_index_binary_header(IndexBinary* idx, IOReader* f) {
833
+ READ1(idx->d);
834
+ READ1(idx->code_size);
835
+ READ1(idx->ntotal);
836
+ READ1(idx->is_trained);
837
+ READ1(idx->metric_type);
684
838
  idx->verbose = false;
685
839
  }
686
840
 
687
- static void read_binary_ivf_header (
688
- IndexBinaryIVF *ivf, IOReader *f,
689
- std::vector<std::vector<Index::idx_t> > *ids = nullptr)
690
- {
691
- read_index_binary_header (ivf, f);
692
- READ1 (ivf->nlist);
693
- READ1 (ivf->nprobe);
694
- ivf->quantizer = read_index_binary (f);
841
+ static void read_binary_ivf_header(
842
+ IndexBinaryIVF* ivf,
843
+ IOReader* f,
844
+ std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
845
+ read_index_binary_header(ivf, f);
846
+ READ1(ivf->nlist);
847
+ READ1(ivf->nprobe);
848
+ ivf->quantizer = read_index_binary(f);
695
849
  ivf->own_fields = true;
696
850
  if (ids) { // used in legacy "Iv" formats
697
- ids->resize (ivf->nlist);
851
+ ids->resize(ivf->nlist);
698
852
  for (size_t i = 0; i < ivf->nlist; i++)
699
- READVECTOR ((*ids)[i]);
853
+ READVECTOR((*ids)[i]);
700
854
  }
701
- read_direct_map (&ivf->direct_map, f);
855
+ read_direct_map(&ivf->direct_map, f);
702
856
  }
703
857
 
704
- static void read_binary_hash_invlists (
705
- IndexBinaryHash::InvertedListMap &invlists,
706
- int b, IOReader *f)
707
- {
858
+ static void read_binary_hash_invlists(
859
+ IndexBinaryHash::InvertedListMap& invlists,
860
+ int b,
861
+ IOReader* f) {
708
862
  size_t sz;
709
- READ1 (sz);
863
+ READ1(sz);
710
864
  int il_nbit = 0;
711
- READ1 (il_nbit);
865
+ READ1(il_nbit);
712
866
  // buffer for bitstrings
713
867
  std::vector<uint8_t> buf((b + il_nbit) * sz);
714
- READVECTOR (buf);
715
- BitstringReader rd (buf.data(), buf.size());
716
- invlists.reserve (sz);
868
+ READVECTOR(buf);
869
+ BitstringReader rd(buf.data(), buf.size());
870
+ invlists.reserve(sz);
717
871
  for (size_t i = 0; i < sz; i++) {
718
872
  uint64_t hash = rd.read(b);
719
873
  uint64_t ilsz = rd.read(il_nbit);
720
- auto & il = invlists[hash];
721
- READVECTOR (il.ids);
722
- FAISS_THROW_IF_NOT (il.ids.size() == ilsz);
723
- READVECTOR (il.vecs);
874
+ auto& il = invlists[hash];
875
+ READVECTOR(il.ids);
876
+ FAISS_THROW_IF_NOT(il.ids.size() == ilsz);
877
+ READVECTOR(il.vecs);
724
878
  }
725
879
  }
726
880
 
727
881
  static void read_binary_multi_hash_map(
728
- IndexBinaryMultiHash::Map &map,
729
- int b, size_t ntotal,
730
- IOReader *f)
731
- {
882
+ IndexBinaryMultiHash::Map& map,
883
+ int b,
884
+ size_t ntotal,
885
+ IOReader* f) {
732
886
  int id_bits;
733
887
  size_t sz;
734
- READ1 (id_bits);
735
- READ1 (sz);
888
+ READ1(id_bits);
889
+ READ1(sz);
736
890
  std::vector<uint8_t> buf;
737
- READVECTOR (buf);
891
+ READVECTOR(buf);
738
892
  size_t nbit = (b + id_bits) * sz + ntotal * id_bits;
739
- FAISS_THROW_IF_NOT (buf.size() == (nbit + 7) / 8);
740
- BitstringReader rd (buf.data(), buf.size());
741
- map.reserve (sz);
893
+ FAISS_THROW_IF_NOT(buf.size() == (nbit + 7) / 8);
894
+ BitstringReader rd(buf.data(), buf.size());
895
+ map.reserve(sz);
742
896
  for (size_t i = 0; i < sz; i++) {
743
897
  uint64_t hash = rd.read(b);
744
898
  uint64_t ilsz = rd.read(id_bits);
745
- auto & il = map[hash];
899
+ auto& il = map[hash];
746
900
  for (size_t j = 0; j < ilsz; j++) {
747
- il.push_back (rd.read (id_bits));
901
+ il.push_back(rd.read(id_bits));
748
902
  }
749
903
  }
750
904
  }
751
905
 
752
-
753
-
754
- IndexBinary *read_index_binary (IOReader *f, int io_flags) {
755
- IndexBinary * idx = nullptr;
906
+ IndexBinary* read_index_binary(IOReader* f, int io_flags) {
907
+ IndexBinary* idx = nullptr;
756
908
  uint32_t h;
757
- READ1 (h);
758
- if (h == fourcc ("IBxF")) {
759
- IndexBinaryFlat *idxf = new IndexBinaryFlat ();
760
- read_index_binary_header (idxf, f);
761
- READVECTOR (idxf->xb);
762
- FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->code_size);
909
+ READ1(h);
910
+ if (h == fourcc("IBxF")) {
911
+ IndexBinaryFlat* idxf = new IndexBinaryFlat();
912
+ read_index_binary_header(idxf, f);
913
+ READVECTOR(idxf->xb);
914
+ FAISS_THROW_IF_NOT(idxf->xb.size() == idxf->ntotal * idxf->code_size);
763
915
  // leak!
764
916
  idx = idxf;
765
- } else if (h == fourcc ("IBwF")) {
766
- IndexBinaryIVF *ivf = new IndexBinaryIVF ();
767
- read_binary_ivf_header (ivf, f);
768
- read_InvertedLists (ivf, f, io_flags);
917
+ } else if (h == fourcc("IBwF")) {
918
+ IndexBinaryIVF* ivf = new IndexBinaryIVF();
919
+ read_binary_ivf_header(ivf, f);
920
+ read_InvertedLists(ivf, f, io_flags);
769
921
  idx = ivf;
770
- } else if (h == fourcc ("IBFf")) {
771
- IndexBinaryFromFloat *idxff = new IndexBinaryFromFloat ();
772
- read_index_binary_header (idxff, f);
922
+ } else if (h == fourcc("IBFf")) {
923
+ IndexBinaryFromFloat* idxff = new IndexBinaryFromFloat();
924
+ read_index_binary_header(idxff, f);
773
925
  idxff->own_fields = true;
774
- idxff->index = read_index (f, io_flags);
926
+ idxff->index = read_index(f, io_flags);
775
927
  idx = idxff;
776
- } else if (h == fourcc ("IBHf")) {
777
- IndexBinaryHNSW *idxhnsw = new IndexBinaryHNSW ();
778
- read_index_binary_header (idxhnsw, f);
779
- read_HNSW (&idxhnsw->hnsw, f);
780
- idxhnsw->storage = read_index_binary (f, io_flags);
928
+ } else if (h == fourcc("IBHf")) {
929
+ IndexBinaryHNSW* idxhnsw = new IndexBinaryHNSW();
930
+ read_index_binary_header(idxhnsw, f);
931
+ read_HNSW(&idxhnsw->hnsw, f);
932
+ idxhnsw->storage = read_index_binary(f, io_flags);
781
933
  idxhnsw->own_fields = true;
782
934
  idx = idxhnsw;
783
- } else if(h == fourcc ("IBMp") || h == fourcc ("IBM2")) {
784
- bool is_map2 = h == fourcc ("IBM2");
785
- IndexBinaryIDMap * idxmap = is_map2 ?
786
- new IndexBinaryIDMap2 () : new IndexBinaryIDMap ();
787
- read_index_binary_header (idxmap, f);
788
- idxmap->index = read_index_binary (f, io_flags);
935
+ } else if (h == fourcc("IBMp") || h == fourcc("IBM2")) {
936
+ bool is_map2 = h == fourcc("IBM2");
937
+ IndexBinaryIDMap* idxmap =
938
+ is_map2 ? new IndexBinaryIDMap2() : new IndexBinaryIDMap();
939
+ read_index_binary_header(idxmap, f);
940
+ idxmap->index = read_index_binary(f, io_flags);
789
941
  idxmap->own_fields = true;
790
- READVECTOR (idxmap->id_map);
942
+ READVECTOR(idxmap->id_map);
791
943
  if (is_map2) {
792
- static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map ();
944
+ static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map();
793
945
  }
794
946
  idx = idxmap;
795
- } else if(h == fourcc("IBHh")) {
796
- IndexBinaryHash *idxh = new IndexBinaryHash ();
797
- read_index_binary_header (idxh, f);
798
- READ1 (idxh->b);
799
- READ1 (idxh->nflip);
947
+ } else if (h == fourcc("IBHh")) {
948
+ IndexBinaryHash* idxh = new IndexBinaryHash();
949
+ read_index_binary_header(idxh, f);
950
+ READ1(idxh->b);
951
+ READ1(idxh->nflip);
800
952
  read_binary_hash_invlists(idxh->invlists, idxh->b, f);
801
953
  idx = idxh;
802
- } else if(h == fourcc("IBHm")) {
803
- IndexBinaryMultiHash* idxmh = new IndexBinaryMultiHash ();
804
- read_index_binary_header (idxmh, f);
805
- idxmh->storage = dynamic_cast<IndexBinaryFlat*> (read_index_binary (f));
806
- FAISS_THROW_IF_NOT(idxmh->storage && idxmh->storage->ntotal == idxmh->ntotal);
954
+ } else if (h == fourcc("IBHm")) {
955
+ IndexBinaryMultiHash* idxmh = new IndexBinaryMultiHash();
956
+ read_index_binary_header(idxmh, f);
957
+ idxmh->storage = dynamic_cast<IndexBinaryFlat*>(read_index_binary(f));
958
+ FAISS_THROW_IF_NOT(
959
+ idxmh->storage && idxmh->storage->ntotal == idxmh->ntotal);
807
960
  idxmh->own_fields = true;
808
- READ1 (idxmh->b);
809
- READ1 (idxmh->nhash);
810
- READ1 (idxmh->nflip);
811
- idxmh->maps.resize (idxmh->nhash);
961
+ READ1(idxmh->b);
962
+ READ1(idxmh->nhash);
963
+ READ1(idxmh->nflip);
964
+ idxmh->maps.resize(idxmh->nhash);
812
965
  for (int i = 0; i < idxmh->nhash; i++) {
813
966
  read_binary_multi_hash_map(
814
967
  idxmh->maps[i], idxmh->b, idxmh->ntotal, f);
@@ -816,25 +969,23 @@ IndexBinary *read_index_binary (IOReader *f, int io_flags) {
816
969
  idx = idxmh;
817
970
  } else {
818
971
  FAISS_THROW_FMT(
819
- "Index type %08x (\"%s\") not recognized",
820
- h, fourcc_inv_printable(h).c_str()
821
- );
972
+ "Index type %08x (\"%s\") not recognized",
973
+ h,
974
+ fourcc_inv_printable(h).c_str());
822
975
  idx = nullptr;
823
976
  }
824
977
  return idx;
825
978
  }
826
979
 
827
- IndexBinary *read_index_binary (FILE * f, int io_flags) {
980
+ IndexBinary* read_index_binary(FILE* f, int io_flags) {
828
981
  FileIOReader reader(f);
829
982
  return read_index_binary(&reader, io_flags);
830
983
  }
831
984
 
832
- IndexBinary *read_index_binary (const char *fname, int io_flags) {
985
+ IndexBinary* read_index_binary(const char* fname, int io_flags) {
833
986
  FileIOReader reader(fname);
834
- IndexBinary *idx = read_index_binary (&reader, io_flags);
987
+ IndexBinary* idx = read_index_binary(&reader, io_flags);
835
988
  return idx;
836
989
  }
837
990
 
838
-
839
-
840
991
  } // namespace faiss