faiss 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  5. data/vendor/faiss/faiss/AutoTune.h +55 -56
  6. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  7. data/vendor/faiss/faiss/Clustering.h +88 -35
  8. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  9. data/vendor/faiss/faiss/IVFlib.h +48 -51
  10. data/vendor/faiss/faiss/Index.cpp +85 -103
  11. data/vendor/faiss/faiss/Index.h +54 -48
  12. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  13. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  14. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  15. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  16. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  17. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  18. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  25. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  26. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  27. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  29. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  30. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  31. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  32. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  33. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  34. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  35. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  38. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  39. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  42. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  43. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  44. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  45. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  46. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  47. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  48. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  49. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  50. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  51. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  52. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  53. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  54. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  55. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  56. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  57. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  58. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  59. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  60. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  61. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  62. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  63. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  64. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  65. data/vendor/faiss/faiss/IndexShards.h +85 -73
  66. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  67. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  68. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  69. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  70. data/vendor/faiss/faiss/MetricType.h +7 -7
  71. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  72. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  73. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  74. data/vendor/faiss/faiss/clone_index.h +4 -9
  75. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  76. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  77. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  78. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  79. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  82. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  84. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  85. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  89. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  90. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  91. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  92. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  93. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  94. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  95. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  96. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  97. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  98. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  99. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  100. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  101. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  102. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  103. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  104. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  105. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  110. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  111. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  112. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  113. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  114. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  115. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  116. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  117. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  118. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  119. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  125. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  126. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  127. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  128. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  134. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  135. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  136. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  137. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  138. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  139. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  141. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  142. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  144. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  145. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  146. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  147. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  148. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  149. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  150. data/vendor/faiss/faiss/impl/io.h +31 -41
  151. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  152. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  153. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  154. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  155. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  159. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  160. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  161. data/vendor/faiss/faiss/index_factory.h +6 -7
  162. data/vendor/faiss/faiss/index_io.h +23 -26
  163. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  164. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  165. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  166. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  167. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  168. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  169. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  170. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  172. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  173. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  174. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  175. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  176. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  177. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  178. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  179. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  180. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  181. data/vendor/faiss/faiss/utils/distances.h +133 -118
  182. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  183. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  184. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  185. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  186. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  187. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  188. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  189. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  190. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  191. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  192. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  193. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  194. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  195. data/vendor/faiss/faiss/utils/random.h +13 -16
  196. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  197. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  198. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  199. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  200. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  201. data/vendor/faiss/faiss/utils/utils.h +53 -48
  202. metadata +20 -2
@@ -9,11 +9,14 @@
9
9
 
10
10
  #include <faiss/index_io.h>
11
11
 
12
+ #include <faiss/impl/io.h>
13
+ #include <faiss/impl/io_macros.h>
14
+
12
15
  #include <cstdio>
13
16
  #include <cstdlib>
14
17
 
15
- #include <sys/types.h>
16
18
  #include <sys/stat.h>
19
+ #include <sys/types.h>
17
20
 
18
21
  #include <faiss/invlists/InvertedListsIOHook.h>
19
22
 
@@ -22,30 +25,32 @@
22
25
  #include <faiss/impl/io_macros.h>
23
26
  #include <faiss/utils/hamming.h>
24
27
 
28
+ #include <faiss/Index2Layer.h>
25
29
  #include <faiss/IndexFlat.h>
26
- #include <faiss/VectorTransform.h>
27
- #include <faiss/IndexPreTransform.h>
28
- #include <faiss/IndexLSH.h>
29
- #include <faiss/IndexPQ.h>
30
+ #include <faiss/IndexHNSW.h>
30
31
  #include <faiss/IndexIVF.h>
32
+ #include <faiss/IndexIVFFlat.h>
31
33
  #include <faiss/IndexIVFPQ.h>
34
+ #include <faiss/IndexIVFPQFastScan.h>
32
35
  #include <faiss/IndexIVFPQR.h>
33
- #include <faiss/Index2Layer.h>
34
- #include <faiss/IndexIVFFlat.h>
35
36
  #include <faiss/IndexIVFSpectralHash.h>
36
- #include <faiss/MetaIndexes.h>
37
- #include <faiss/IndexScalarQuantizer.h>
38
- #include <faiss/IndexHNSW.h>
37
+ #include <faiss/IndexLSH.h>
39
38
  #include <faiss/IndexLattice.h>
39
+ #include <faiss/IndexNSG.h>
40
+ #include <faiss/IndexPQ.h>
40
41
  #include <faiss/IndexPQFastScan.h>
41
- #include <faiss/IndexIVFPQFastScan.h>
42
+ #include <faiss/IndexPreTransform.h>
42
43
  #include <faiss/IndexRefine.h>
44
+ #include <faiss/IndexResidual.h>
45
+ #include <faiss/IndexScalarQuantizer.h>
46
+ #include <faiss/MetaIndexes.h>
47
+ #include <faiss/VectorTransform.h>
43
48
 
44
49
  #include <faiss/IndexBinaryFlat.h>
45
50
  #include <faiss/IndexBinaryFromFloat.h>
46
51
  #include <faiss/IndexBinaryHNSW.h>
47
- #include <faiss/IndexBinaryIVF.h>
48
52
  #include <faiss/IndexBinaryHash.h>
53
+ #include <faiss/IndexBinaryIVF.h>
49
54
 
50
55
  /*************************************************************
51
56
  * The I/O format is the content of the class. For objects that are
@@ -68,112 +73,120 @@
68
73
 
69
74
  namespace faiss {
70
75
 
71
-
72
76
  /*************************************************************
73
77
  * Write
74
78
  **************************************************************/
75
- static void write_index_header (const Index *idx, IOWriter *f) {
76
- WRITE1 (idx->d);
77
- WRITE1 (idx->ntotal);
79
+ static void write_index_header(const Index* idx, IOWriter* f) {
80
+ WRITE1(idx->d);
81
+ WRITE1(idx->ntotal);
78
82
  Index::idx_t dummy = 1 << 20;
79
- WRITE1 (dummy);
80
- WRITE1 (dummy);
81
- WRITE1 (idx->is_trained);
82
- WRITE1 (idx->metric_type);
83
+ WRITE1(dummy);
84
+ WRITE1(dummy);
85
+ WRITE1(idx->is_trained);
86
+ WRITE1(idx->metric_type);
83
87
  if (idx->metric_type > 1) {
84
- WRITE1 (idx->metric_arg);
88
+ WRITE1(idx->metric_arg);
85
89
  }
86
90
  }
87
91
 
88
- void write_VectorTransform (const VectorTransform *vt, IOWriter *f) {
89
- if (const LinearTransform * lt =
90
- dynamic_cast < const LinearTransform *> (vt)) {
91
- if (dynamic_cast<const RandomRotationMatrix *>(lt)) {
92
- uint32_t h = fourcc ("rrot");
93
- WRITE1 (h);
94
- } else if (const PCAMatrix * pca =
95
- dynamic_cast<const PCAMatrix *>(lt)) {
96
- uint32_t h = fourcc ("PcAm");
97
- WRITE1 (h);
98
- WRITE1 (pca->eigen_power);
99
- WRITE1 (pca->random_rotation);
100
- WRITE1 (pca->balanced_bins);
101
- WRITEVECTOR (pca->mean);
102
- WRITEVECTOR (pca->eigenvalues);
103
- WRITEVECTOR (pca->PCAMat);
104
- } else if (const ITQMatrix * itqm =
105
- dynamic_cast<const ITQMatrix *>(lt)) {
106
- uint32_t h = fourcc ("Viqm");
107
- WRITE1 (h);
108
- WRITE1 (itqm->max_iter);
109
- WRITE1 (itqm->seed);
92
+ void write_VectorTransform(const VectorTransform* vt, IOWriter* f) {
93
+ if (const LinearTransform* lt = dynamic_cast<const LinearTransform*>(vt)) {
94
+ if (dynamic_cast<const RandomRotationMatrix*>(lt)) {
95
+ uint32_t h = fourcc("rrot");
96
+ WRITE1(h);
97
+ } else if (const PCAMatrix* pca = dynamic_cast<const PCAMatrix*>(lt)) {
98
+ uint32_t h = fourcc("PcAm");
99
+ WRITE1(h);
100
+ WRITE1(pca->eigen_power);
101
+ WRITE1(pca->random_rotation);
102
+ WRITE1(pca->balanced_bins);
103
+ WRITEVECTOR(pca->mean);
104
+ WRITEVECTOR(pca->eigenvalues);
105
+ WRITEVECTOR(pca->PCAMat);
106
+ } else if (const ITQMatrix* itqm = dynamic_cast<const ITQMatrix*>(lt)) {
107
+ uint32_t h = fourcc("Viqm");
108
+ WRITE1(h);
109
+ WRITE1(itqm->max_iter);
110
+ WRITE1(itqm->seed);
110
111
  } else {
111
112
  // generic LinearTransform (includes OPQ)
112
- uint32_t h = fourcc ("LTra");
113
- WRITE1 (h);
113
+ uint32_t h = fourcc("LTra");
114
+ WRITE1(h);
114
115
  }
115
- WRITE1 (lt->have_bias);
116
- WRITEVECTOR (lt->A);
117
- WRITEVECTOR (lt->b);
118
- } else if (const RemapDimensionsTransform *rdt =
119
- dynamic_cast<const RemapDimensionsTransform *>(vt)) {
120
- uint32_t h = fourcc ("RmDT");
121
- WRITE1 (h);
122
- WRITEVECTOR (rdt->map);
123
- } else if (const NormalizationTransform *nt =
124
- dynamic_cast<const NormalizationTransform *>(vt)) {
125
- uint32_t h = fourcc ("VNrm");
126
- WRITE1 (h);
127
- WRITE1 (nt->norm);
128
- } else if (const CenteringTransform *ct =
129
- dynamic_cast<const CenteringTransform *>(vt)) {
130
- uint32_t h = fourcc ("VCnt");
131
- WRITE1 (h);
132
- WRITEVECTOR (ct->mean);
133
- } else if (const ITQTransform *itqt =
134
- dynamic_cast<const ITQTransform*> (vt)) {
135
- uint32_t h = fourcc ("Viqt");
136
- WRITE1 (h);
137
- WRITEVECTOR (itqt->mean);
138
- WRITE1 (itqt->do_pca);
139
- write_VectorTransform (&itqt->itq, f);
140
- write_VectorTransform (&itqt->pca_then_itq, f);
116
+ WRITE1(lt->have_bias);
117
+ WRITEVECTOR(lt->A);
118
+ WRITEVECTOR(lt->b);
119
+ } else if (
120
+ const RemapDimensionsTransform* rdt =
121
+ dynamic_cast<const RemapDimensionsTransform*>(vt)) {
122
+ uint32_t h = fourcc("RmDT");
123
+ WRITE1(h);
124
+ WRITEVECTOR(rdt->map);
125
+ } else if (
126
+ const NormalizationTransform* nt =
127
+ dynamic_cast<const NormalizationTransform*>(vt)) {
128
+ uint32_t h = fourcc("VNrm");
129
+ WRITE1(h);
130
+ WRITE1(nt->norm);
131
+ } else if (
132
+ const CenteringTransform* ct =
133
+ dynamic_cast<const CenteringTransform*>(vt)) {
134
+ uint32_t h = fourcc("VCnt");
135
+ WRITE1(h);
136
+ WRITEVECTOR(ct->mean);
137
+ } else if (
138
+ const ITQTransform* itqt = dynamic_cast<const ITQTransform*>(vt)) {
139
+ uint32_t h = fourcc("Viqt");
140
+ WRITE1(h);
141
+ WRITEVECTOR(itqt->mean);
142
+ WRITE1(itqt->do_pca);
143
+ write_VectorTransform(&itqt->itq, f);
144
+ write_VectorTransform(&itqt->pca_then_itq, f);
141
145
  } else {
142
- FAISS_THROW_MSG ("cannot serialize this");
146
+ FAISS_THROW_MSG("cannot serialize this");
143
147
  }
144
148
  // common fields
145
- WRITE1 (vt->d_in);
146
- WRITE1 (vt->d_out);
147
- WRITE1 (vt->is_trained);
149
+ WRITE1(vt->d_in);
150
+ WRITE1(vt->d_out);
151
+ WRITE1(vt->is_trained);
148
152
  }
149
153
 
150
- void write_ProductQuantizer (const ProductQuantizer *pq, IOWriter *f) {
151
- WRITE1 (pq->d);
152
- WRITE1 (pq->M);
153
- WRITE1 (pq->nbits);
154
- WRITEVECTOR (pq->centroids);
154
+ void write_ProductQuantizer(const ProductQuantizer* pq, IOWriter* f) {
155
+ WRITE1(pq->d);
156
+ WRITE1(pq->M);
157
+ WRITE1(pq->nbits);
158
+ WRITEVECTOR(pq->centroids);
155
159
  }
156
160
 
157
- static void write_ScalarQuantizer (
158
- const ScalarQuantizer *ivsc, IOWriter *f) {
159
- WRITE1 (ivsc->qtype);
160
- WRITE1 (ivsc->rangestat);
161
- WRITE1 (ivsc->rangestat_arg);
162
- WRITE1 (ivsc->d);
163
- WRITE1 (ivsc->code_size);
164
- WRITEVECTOR (ivsc->trained);
161
+ void write_ResidualQuantizer(const ResidualQuantizer* rq, IOWriter* f) {
162
+ WRITE1(rq->d);
163
+ WRITE1(rq->M);
164
+ WRITEVECTOR(rq->nbits);
165
+ WRITE1(rq->is_trained);
166
+ WRITE1(rq->train_type);
167
+ WRITE1(rq->max_beam_size);
168
+ WRITEVECTOR(rq->codebooks);
165
169
  }
166
170
 
167
- void write_InvertedLists (const InvertedLists *ils, IOWriter *f) {
171
+ static void write_ScalarQuantizer(const ScalarQuantizer* ivsc, IOWriter* f) {
172
+ WRITE1(ivsc->qtype);
173
+ WRITE1(ivsc->rangestat);
174
+ WRITE1(ivsc->rangestat_arg);
175
+ WRITE1(ivsc->d);
176
+ WRITE1(ivsc->code_size);
177
+ WRITEVECTOR(ivsc->trained);
178
+ }
179
+
180
+ void write_InvertedLists(const InvertedLists* ils, IOWriter* f) {
168
181
  if (ils == nullptr) {
169
- uint32_t h = fourcc ("il00");
170
- WRITE1 (h);
171
- } else if (const auto & ails =
172
- dynamic_cast<const ArrayInvertedLists *>(ils)) {
173
- uint32_t h = fourcc ("ilar");
174
- WRITE1 (h);
175
- WRITE1 (ails->nlist);
176
- WRITE1 (ails->code_size);
182
+ uint32_t h = fourcc("il00");
183
+ WRITE1(h);
184
+ } else if (
185
+ const auto& ails = dynamic_cast<const ArrayInvertedLists*>(ils)) {
186
+ uint32_t h = fourcc("ilar");
187
+ WRITE1(h);
188
+ WRITE1(ails->nlist);
189
+ WRITE1(ails->code_size);
177
190
  // here we store either as a full or a sparse data buffer
178
191
  size_t n_non0 = 0;
179
192
  for (size_t i = 0; i < ails->nlist; i++) {
@@ -182,329 +195,401 @@ void write_InvertedLists (const InvertedLists *ils, IOWriter *f) {
182
195
  }
183
196
  if (n_non0 > ails->nlist / 2) {
184
197
  uint32_t list_type = fourcc("full");
185
- WRITE1 (list_type);
198
+ WRITE1(list_type);
186
199
  std::vector<size_t> sizes;
187
200
  for (size_t i = 0; i < ails->nlist; i++) {
188
- sizes.push_back (ails->ids[i].size());
201
+ sizes.push_back(ails->ids[i].size());
189
202
  }
190
- WRITEVECTOR (sizes);
203
+ WRITEVECTOR(sizes);
191
204
  } else {
192
205
  int list_type = fourcc("sprs"); // sparse
193
- WRITE1 (list_type);
206
+ WRITE1(list_type);
194
207
  std::vector<size_t> sizes;
195
208
  for (size_t i = 0; i < ails->nlist; i++) {
196
209
  size_t n = ails->ids[i].size();
197
210
  if (n > 0) {
198
- sizes.push_back (i);
199
- sizes.push_back (n);
211
+ sizes.push_back(i);
212
+ sizes.push_back(n);
200
213
  }
201
214
  }
202
- WRITEVECTOR (sizes);
215
+ WRITEVECTOR(sizes);
203
216
  }
204
217
  // make a single contiguous data buffer (useful for mmapping)
205
218
  for (size_t i = 0; i < ails->nlist; i++) {
206
219
  size_t n = ails->ids[i].size();
207
220
  if (n > 0) {
208
- WRITEANDCHECK (ails->codes[i].data(), n * ails->code_size);
209
- WRITEANDCHECK (ails->ids[i].data(), n);
221
+ WRITEANDCHECK(ails->codes[i].data(), n * ails->code_size);
222
+ WRITEANDCHECK(ails->ids[i].data(), n);
210
223
  }
211
224
  }
212
225
 
213
226
  } else {
214
- InvertedListsIOHook::lookup_classname(
215
- typeid(*ils).name())->write(ils, f);
227
+ InvertedListsIOHook::lookup_classname(typeid(*ils).name())
228
+ ->write(ils, f);
216
229
  }
217
230
  }
218
231
 
219
-
220
- void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) {
232
+ void write_ProductQuantizer(const ProductQuantizer* pq, const char* fname) {
221
233
  FileIOWriter writer(fname);
222
- write_ProductQuantizer (pq, &writer);
234
+ write_ProductQuantizer(pq, &writer);
223
235
  }
224
236
 
225
- static void write_HNSW (const HNSW *hnsw, IOWriter *f) {
237
+ static void write_HNSW(const HNSW* hnsw, IOWriter* f) {
238
+ WRITEVECTOR(hnsw->assign_probas);
239
+ WRITEVECTOR(hnsw->cum_nneighbor_per_level);
240
+ WRITEVECTOR(hnsw->levels);
241
+ WRITEVECTOR(hnsw->offsets);
242
+ WRITEVECTOR(hnsw->neighbors);
243
+
244
+ WRITE1(hnsw->entry_point);
245
+ WRITE1(hnsw->max_level);
246
+ WRITE1(hnsw->efConstruction);
247
+ WRITE1(hnsw->efSearch);
248
+ WRITE1(hnsw->upper_beam);
249
+ }
226
250
 
227
- WRITEVECTOR (hnsw->assign_probas);
228
- WRITEVECTOR (hnsw->cum_nneighbor_per_level);
229
- WRITEVECTOR (hnsw->levels);
230
- WRITEVECTOR (hnsw->offsets);
231
- WRITEVECTOR (hnsw->neighbors);
251
+ static void write_NSG(const NSG* nsg, IOWriter* f) {
252
+ WRITE1(nsg->ntotal);
253
+ WRITE1(nsg->R);
254
+ WRITE1(nsg->L);
255
+ WRITE1(nsg->C);
256
+ WRITE1(nsg->search_L);
257
+ WRITE1(nsg->enterpoint);
258
+ WRITE1(nsg->is_built);
259
+
260
+ if (!nsg->is_built) {
261
+ return;
262
+ }
232
263
 
233
- WRITE1 (hnsw->entry_point);
234
- WRITE1 (hnsw->max_level);
235
- WRITE1 (hnsw->efConstruction);
236
- WRITE1 (hnsw->efSearch);
237
- WRITE1 (hnsw->upper_beam);
264
+ constexpr int EMPTY_ID = -1;
265
+ auto& graph = nsg->final_graph;
266
+ int K = graph->K;
267
+ int N = graph->N;
268
+ FAISS_THROW_IF_NOT(N == nsg->ntotal);
269
+ FAISS_THROW_IF_NOT(K == nsg->R);
270
+ FAISS_THROW_IF_NOT(true == graph->own_fields);
271
+
272
+ int size = 0;
273
+ for (int i = 0; i < N; i++) {
274
+ for (int j = 0; j < K; j++) {
275
+ int id = graph->at(i, j);
276
+ if (id != EMPTY_ID) {
277
+ WRITE1(id);
278
+ size += 1;
279
+ } else {
280
+ break;
281
+ }
282
+ }
283
+ WRITE1(EMPTY_ID);
284
+ }
238
285
  }
239
286
 
240
- static void write_direct_map (const DirectMap *dm, IOWriter *f) {
241
- char maintain_direct_map = (char)dm->type; // for backwards compatibility with bool
242
- WRITE1 (maintain_direct_map);
243
- WRITEVECTOR (dm->array);
287
+ static void write_direct_map(const DirectMap* dm, IOWriter* f) {
288
+ char maintain_direct_map =
289
+ (char)dm->type; // for backwards compatibility with bool
290
+ WRITE1(maintain_direct_map);
291
+ WRITEVECTOR(dm->array);
244
292
  if (dm->type == DirectMap::Hashtable) {
245
293
  using idx_t = Index::idx_t;
246
294
  std::vector<std::pair<idx_t, idx_t>> v;
247
- const std::unordered_map<idx_t, idx_t> & map = dm->hashtable;
248
- v.resize (map.size());
295
+ const std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
296
+ v.resize(map.size());
249
297
  std::copy(map.begin(), map.end(), v.begin());
250
- WRITEVECTOR (v);
298
+ WRITEVECTOR(v);
251
299
  }
252
300
  }
253
301
 
254
- static void write_ivf_header (const IndexIVF *ivf, IOWriter *f) {
255
- write_index_header (ivf, f);
256
- WRITE1 (ivf->nlist);
257
- WRITE1 (ivf->nprobe);
258
- write_index (ivf->quantizer, f);
259
- write_direct_map (&ivf->direct_map, f);
302
+ static void write_ivf_header(const IndexIVF* ivf, IOWriter* f) {
303
+ write_index_header(ivf, f);
304
+ WRITE1(ivf->nlist);
305
+ WRITE1(ivf->nprobe);
306
+ write_index(ivf->quantizer, f);
307
+ write_direct_map(&ivf->direct_map, f);
260
308
  }
261
309
 
262
- void write_index (const Index *idx, IOWriter *f) {
263
- if (const IndexFlat * idxf = dynamic_cast<const IndexFlat *> (idx)) {
264
- uint32_t h = fourcc (
265
- idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" :
266
- idxf->metric_type == METRIC_L2 ? "IxF2" : "IxFl");
267
- WRITE1 (h);
268
- write_index_header (idx, f);
269
- WRITEVECTOR (idxf->xb);
270
- } else if(const IndexLSH * idxl = dynamic_cast<const IndexLSH *> (idx)) {
271
- uint32_t h = fourcc ("IxHe");
272
- WRITE1 (h);
273
- write_index_header (idx, f);
274
- WRITE1 (idxl->nbits);
275
- WRITE1 (idxl->rotate_data);
276
- WRITE1 (idxl->train_thresholds);
277
- WRITEVECTOR (idxl->thresholds);
278
- WRITE1 (idxl->bytes_per_vec);
279
- write_VectorTransform (&idxl->rrot, f);
280
- WRITEVECTOR (idxl->codes);
281
- } else if(const IndexPQ * idxp = dynamic_cast<const IndexPQ *> (idx)) {
282
- uint32_t h = fourcc ("IxPq");
283
- WRITE1 (h);
284
- write_index_header (idx, f);
285
- write_ProductQuantizer (&idxp->pq, f);
286
- WRITEVECTOR (idxp->codes);
310
+ void write_index(const Index* idx, IOWriter* f) {
311
+ if (const IndexFlat* idxf = dynamic_cast<const IndexFlat*>(idx)) {
312
+ uint32_t h =
313
+ fourcc(idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI"
314
+ : idxf->metric_type == METRIC_L2 ? "IxF2"
315
+ : "IxFl");
316
+ WRITE1(h);
317
+ write_index_header(idx, f);
318
+ WRITEVECTOR(idxf->xb);
319
+ } else if (const IndexLSH* idxl = dynamic_cast<const IndexLSH*>(idx)) {
320
+ uint32_t h = fourcc("IxHe");
321
+ WRITE1(h);
322
+ write_index_header(idx, f);
323
+ WRITE1(idxl->nbits);
324
+ WRITE1(idxl->rotate_data);
325
+ WRITE1(idxl->train_thresholds);
326
+ WRITEVECTOR(idxl->thresholds);
327
+ WRITE1(idxl->bytes_per_vec);
328
+ write_VectorTransform(&idxl->rrot, f);
329
+ WRITEVECTOR(idxl->codes);
330
+ } else if (const IndexPQ* idxp = dynamic_cast<const IndexPQ*>(idx)) {
331
+ uint32_t h = fourcc("IxPq");
332
+ WRITE1(h);
333
+ write_index_header(idx, f);
334
+ write_ProductQuantizer(&idxp->pq, f);
335
+ WRITEVECTOR(idxp->codes);
287
336
  // search params -- maybe not useful to store?
288
- WRITE1 (idxp->search_type);
289
- WRITE1 (idxp->encode_signs);
290
- WRITE1 (idxp->polysemous_ht);
291
- } else if(const Index2Layer * idxp =
292
- dynamic_cast<const Index2Layer *> (idx)) {
293
- uint32_t h = fourcc ("Ix2L");
294
- WRITE1 (h);
295
- write_index_header (idx, f);
296
- write_index (idxp->q1.quantizer, f);
297
- WRITE1 (idxp->q1.nlist);
298
- WRITE1 (idxp->q1.quantizer_trains_alone);
299
- write_ProductQuantizer (&idxp->pq, f);
300
- WRITE1 (idxp->code_size_1);
301
- WRITE1 (idxp->code_size_2);
302
- WRITE1 (idxp->code_size);
303
- WRITEVECTOR (idxp->codes);
304
- } else if(const IndexScalarQuantizer * idxs =
305
- dynamic_cast<const IndexScalarQuantizer *> (idx)) {
306
- uint32_t h = fourcc ("IxSQ");
307
- WRITE1 (h);
308
- write_index_header (idx, f);
309
- write_ScalarQuantizer (&idxs->sq, f);
310
- WRITEVECTOR (idxs->codes);
311
- } else if(const IndexLattice * idxl =
312
- dynamic_cast<const IndexLattice *> (idx)) {
313
- uint32_t h = fourcc ("IxLa");
314
- WRITE1 (h);
315
- WRITE1 (idxl->d);
316
- WRITE1 (idxl->nsq);
317
- WRITE1 (idxl->scale_nbit);
318
- WRITE1 (idxl->zn_sphere_codec.r2);
319
- write_index_header (idx, f);
320
- WRITEVECTOR (idxl->trained);
321
- } else if(const IndexIVFFlatDedup * ivfl =
322
- dynamic_cast<const IndexIVFFlatDedup *> (idx)) {
323
- uint32_t h = fourcc ("IwFd");
324
- WRITE1 (h);
325
- write_ivf_header (ivfl, f);
337
+ WRITE1(idxp->search_type);
338
+ WRITE1(idxp->encode_signs);
339
+ WRITE1(idxp->polysemous_ht);
340
+ } else if (
341
+ const IndexResidual* idxr =
342
+ dynamic_cast<const IndexResidual*>(idx)) {
343
+ uint32_t h = fourcc("IxRQ");
344
+ WRITE1(h);
345
+ write_index_header(idx, f);
346
+ write_ResidualQuantizer(&idxr->rq, f);
347
+ WRITE1(idxr->search_type);
348
+ WRITE1(idxr->norm_min);
349
+ WRITE1(idxr->norm_max);
350
+ WRITE1(idxr->code_size);
351
+ WRITEVECTOR(idxr->codes);
352
+ } else if (
353
+ const ResidualCoarseQuantizer* idxr =
354
+ dynamic_cast<const ResidualCoarseQuantizer*>(idx)) {
355
+ uint32_t h = fourcc("ImRQ");
356
+ WRITE1(h);
357
+ write_index_header(idx, f);
358
+ write_ResidualQuantizer(&idxr->rq, f);
359
+ WRITE1(idxr->beam_factor);
360
+ } else if (
361
+ const Index2Layer* idxp = dynamic_cast<const Index2Layer*>(idx)) {
362
+ uint32_t h = fourcc("Ix2L");
363
+ WRITE1(h);
364
+ write_index_header(idx, f);
365
+ write_index(idxp->q1.quantizer, f);
366
+ WRITE1(idxp->q1.nlist);
367
+ WRITE1(idxp->q1.quantizer_trains_alone);
368
+ write_ProductQuantizer(&idxp->pq, f);
369
+ WRITE1(idxp->code_size_1);
370
+ WRITE1(idxp->code_size_2);
371
+ WRITE1(idxp->code_size);
372
+ WRITEVECTOR(idxp->codes);
373
+ } else if (
374
+ const IndexScalarQuantizer* idxs =
375
+ dynamic_cast<const IndexScalarQuantizer*>(idx)) {
376
+ uint32_t h = fourcc("IxSQ");
377
+ WRITE1(h);
378
+ write_index_header(idx, f);
379
+ write_ScalarQuantizer(&idxs->sq, f);
380
+ WRITEVECTOR(idxs->codes);
381
+ } else if (
382
+ const IndexLattice* idxl = dynamic_cast<const IndexLattice*>(idx)) {
383
+ uint32_t h = fourcc("IxLa");
384
+ WRITE1(h);
385
+ WRITE1(idxl->d);
386
+ WRITE1(idxl->nsq);
387
+ WRITE1(idxl->scale_nbit);
388
+ WRITE1(idxl->zn_sphere_codec.r2);
389
+ write_index_header(idx, f);
390
+ WRITEVECTOR(idxl->trained);
391
+ } else if (
392
+ const IndexIVFFlatDedup* ivfl =
393
+ dynamic_cast<const IndexIVFFlatDedup*>(idx)) {
394
+ uint32_t h = fourcc("IwFd");
395
+ WRITE1(h);
396
+ write_ivf_header(ivfl, f);
326
397
  {
327
- std::vector<Index::idx_t> tab (2 * ivfl->instances.size());
398
+ std::vector<Index::idx_t> tab(2 * ivfl->instances.size());
328
399
  long i = 0;
329
- for (auto it = ivfl->instances.begin();
330
- it != ivfl->instances.end(); ++it) {
400
+ for (auto it = ivfl->instances.begin(); it != ivfl->instances.end();
401
+ ++it) {
331
402
  tab[i++] = it->first;
332
403
  tab[i++] = it->second;
333
404
  }
334
- WRITEVECTOR (tab);
405
+ WRITEVECTOR(tab);
335
406
  }
336
- write_InvertedLists (ivfl->invlists, f);
337
- } else if(const IndexIVFFlat * ivfl =
338
- dynamic_cast<const IndexIVFFlat *> (idx)) {
339
- uint32_t h = fourcc ("IwFl");
340
- WRITE1 (h);
341
- write_ivf_header (ivfl, f);
342
- write_InvertedLists (ivfl->invlists, f);
343
- } else if(const IndexIVFScalarQuantizer * ivsc =
344
- dynamic_cast<const IndexIVFScalarQuantizer *> (idx)) {
345
- uint32_t h = fourcc ("IwSq");
346
- WRITE1 (h);
347
- write_ivf_header (ivsc, f);
348
- write_ScalarQuantizer (&ivsc->sq, f);
349
- WRITE1 (ivsc->code_size);
350
- WRITE1 (ivsc->by_residual);
351
- write_InvertedLists (ivsc->invlists, f);
352
- } else if(const IndexIVFSpectralHash *ivsp =
353
- dynamic_cast<const IndexIVFSpectralHash *>(idx)) {
354
- uint32_t h = fourcc ("IwSh");
355
- WRITE1 (h);
356
- write_ivf_header (ivsp, f);
357
- write_VectorTransform (ivsp->vt, f);
358
- WRITE1 (ivsp->nbit);
359
- WRITE1 (ivsp->period);
360
- WRITE1 (ivsp->threshold_type);
361
- WRITEVECTOR (ivsp->trained);
362
- write_InvertedLists (ivsp->invlists, f);
363
- } else if(const IndexIVFPQ * ivpq =
364
- dynamic_cast<const IndexIVFPQ *> (idx)) {
365
- const IndexIVFPQR * ivfpqr = dynamic_cast<const IndexIVFPQR *> (idx);
366
-
367
- uint32_t h = fourcc (ivfpqr ? "IwQR" : "IwPQ");
368
- WRITE1 (h);
369
- write_ivf_header (ivpq, f);
370
- WRITE1 (ivpq->by_residual);
371
- WRITE1 (ivpq->code_size);
372
- write_ProductQuantizer (&ivpq->pq, f);
373
- write_InvertedLists (ivpq->invlists, f);
407
+ write_InvertedLists(ivfl->invlists, f);
408
+ } else if (
409
+ const IndexIVFFlat* ivfl = dynamic_cast<const IndexIVFFlat*>(idx)) {
410
+ uint32_t h = fourcc("IwFl");
411
+ WRITE1(h);
412
+ write_ivf_header(ivfl, f);
413
+ write_InvertedLists(ivfl->invlists, f);
414
+ } else if (
415
+ const IndexIVFScalarQuantizer* ivsc =
416
+ dynamic_cast<const IndexIVFScalarQuantizer*>(idx)) {
417
+ uint32_t h = fourcc("IwSq");
418
+ WRITE1(h);
419
+ write_ivf_header(ivsc, f);
420
+ write_ScalarQuantizer(&ivsc->sq, f);
421
+ WRITE1(ivsc->code_size);
422
+ WRITE1(ivsc->by_residual);
423
+ write_InvertedLists(ivsc->invlists, f);
424
+ } else if (
425
+ const IndexIVFSpectralHash* ivsp =
426
+ dynamic_cast<const IndexIVFSpectralHash*>(idx)) {
427
+ uint32_t h = fourcc("IwSh");
428
+ WRITE1(h);
429
+ write_ivf_header(ivsp, f);
430
+ write_VectorTransform(ivsp->vt, f);
431
+ WRITE1(ivsp->nbit);
432
+ WRITE1(ivsp->period);
433
+ WRITE1(ivsp->threshold_type);
434
+ WRITEVECTOR(ivsp->trained);
435
+ write_InvertedLists(ivsp->invlists, f);
436
+ } else if (const IndexIVFPQ* ivpq = dynamic_cast<const IndexIVFPQ*>(idx)) {
437
+ const IndexIVFPQR* ivfpqr = dynamic_cast<const IndexIVFPQR*>(idx);
438
+
439
+ uint32_t h = fourcc(ivfpqr ? "IwQR" : "IwPQ");
440
+ WRITE1(h);
441
+ write_ivf_header(ivpq, f);
442
+ WRITE1(ivpq->by_residual);
443
+ WRITE1(ivpq->code_size);
444
+ write_ProductQuantizer(&ivpq->pq, f);
445
+ write_InvertedLists(ivpq->invlists, f);
374
446
  if (ivfpqr) {
375
- write_ProductQuantizer (&ivfpqr->refine_pq, f);
376
- WRITEVECTOR (ivfpqr->refine_codes);
377
- WRITE1 (ivfpqr->k_factor);
447
+ write_ProductQuantizer(&ivfpqr->refine_pq, f);
448
+ WRITEVECTOR(ivfpqr->refine_codes);
449
+ WRITE1(ivfpqr->k_factor);
378
450
  }
379
451
 
380
- } else if(const IndexPreTransform * ixpt =
381
- dynamic_cast<const IndexPreTransform *> (idx)) {
382
- uint32_t h = fourcc ("IxPT");
383
- WRITE1 (h);
384
- write_index_header (ixpt, f);
452
+ } else if (
453
+ const IndexPreTransform* ixpt =
454
+ dynamic_cast<const IndexPreTransform*>(idx)) {
455
+ uint32_t h = fourcc("IxPT");
456
+ WRITE1(h);
457
+ write_index_header(ixpt, f);
385
458
  int nt = ixpt->chain.size();
386
- WRITE1 (nt);
459
+ WRITE1(nt);
387
460
  for (int i = 0; i < nt; i++)
388
- write_VectorTransform (ixpt->chain[i], f);
389
- write_index (ixpt->index, f);
390
- } else if(const MultiIndexQuantizer * imiq =
391
- dynamic_cast<const MultiIndexQuantizer *> (idx)) {
392
- uint32_t h = fourcc ("Imiq");
393
- WRITE1 (h);
394
- write_index_header (imiq, f);
395
- write_ProductQuantizer (&imiq->pq, f);
396
- } else if(const IndexRefine * idxrf =
397
- dynamic_cast<const IndexRefine *> (idx)) {
398
- uint32_t h = fourcc ("IxRF");
399
- WRITE1 (h);
400
- write_index_header (idxrf, f);
401
- write_index (idxrf->base_index, f);
402
- write_index (idxrf->refine_index, f);
403
- WRITE1 (idxrf->k_factor);
404
- } else if(const IndexIDMap * idxmap =
405
- dynamic_cast<const IndexIDMap *> (idx)) {
406
- uint32_t h =
407
- dynamic_cast<const IndexIDMap2 *> (idx) ? fourcc ("IxM2") :
408
- fourcc ("IxMp");
461
+ write_VectorTransform(ixpt->chain[i], f);
462
+ write_index(ixpt->index, f);
463
+ } else if (
464
+ const MultiIndexQuantizer* imiq =
465
+ dynamic_cast<const MultiIndexQuantizer*>(idx)) {
466
+ uint32_t h = fourcc("Imiq");
467
+ WRITE1(h);
468
+ write_index_header(imiq, f);
469
+ write_ProductQuantizer(&imiq->pq, f);
470
+ } else if (
471
+ const IndexRefine* idxrf = dynamic_cast<const IndexRefine*>(idx)) {
472
+ uint32_t h = fourcc("IxRF");
473
+ WRITE1(h);
474
+ write_index_header(idxrf, f);
475
+ write_index(idxrf->base_index, f);
476
+ write_index(idxrf->refine_index, f);
477
+ WRITE1(idxrf->k_factor);
478
+ } else if (
479
+ const IndexIDMap* idxmap = dynamic_cast<const IndexIDMap*>(idx)) {
480
+ uint32_t h = dynamic_cast<const IndexIDMap2*>(idx) ? fourcc("IxM2")
481
+ : fourcc("IxMp");
409
482
  // no need to store additional info for IndexIDMap2
410
- WRITE1 (h);
411
- write_index_header (idxmap, f);
412
- write_index (idxmap->index, f);
413
- WRITEVECTOR (idxmap->id_map);
414
- } else if(const IndexHNSW * idxhnsw =
415
- dynamic_cast<const IndexHNSW *> (idx)) {
483
+ WRITE1(h);
484
+ write_index_header(idxmap, f);
485
+ write_index(idxmap->index, f);
486
+ WRITEVECTOR(idxmap->id_map);
487
+ } else if (const IndexHNSW* idxhnsw = dynamic_cast<const IndexHNSW*>(idx)) {
488
+ uint32_t h = dynamic_cast<const IndexHNSWFlat*>(idx) ? fourcc("IHNf")
489
+ : dynamic_cast<const IndexHNSWPQ*>(idx) ? fourcc("IHNp")
490
+ : dynamic_cast<const IndexHNSWSQ*>(idx) ? fourcc("IHNs")
491
+ : dynamic_cast<const IndexHNSW2Level*>(idx) ? fourcc("IHN2")
492
+ : 0;
493
+ FAISS_THROW_IF_NOT(h != 0);
494
+ WRITE1(h);
495
+ write_index_header(idxhnsw, f);
496
+ write_HNSW(&idxhnsw->hnsw, f);
497
+ write_index(idxhnsw->storage, f);
498
+ } else if (const IndexNSG* idxnsg = dynamic_cast<const IndexNSG*>(idx)) {
416
499
  uint32_t h =
417
- dynamic_cast<const IndexHNSWFlat*>(idx) ? fourcc("IHNf") :
418
- dynamic_cast<const IndexHNSWPQ*>(idx) ? fourcc("IHNp") :
419
- dynamic_cast<const IndexHNSWSQ*>(idx) ? fourcc("IHNs") :
420
- dynamic_cast<const IndexHNSW2Level*>(idx) ? fourcc("IHN2") :
421
- 0;
422
- FAISS_THROW_IF_NOT (h != 0);
423
- WRITE1 (h);
424
- write_index_header (idxhnsw, f);
425
- write_HNSW (&idxhnsw->hnsw, f);
426
- write_index (idxhnsw->storage, f);
427
- } else if (const IndexPQFastScan *idxpqfs =
428
- dynamic_cast<const IndexPQFastScan*>(idx)) {
500
+ dynamic_cast<const IndexNSGFlat*>(idx) ? fourcc("INSf") : 0;
501
+ FAISS_THROW_IF_NOT(h != 0);
502
+ WRITE1(h);
503
+ write_index_header(idxnsg, f);
504
+ WRITE1(idxnsg->GK);
505
+ WRITE1(idxnsg->build_type);
506
+ WRITE1(idxnsg->nndescent_S);
507
+ WRITE1(idxnsg->nndescent_R);
508
+ WRITE1(idxnsg->nndescent_L);
509
+ WRITE1(idxnsg->nndescent_iter);
510
+ write_NSG(&idxnsg->nsg, f);
511
+ write_index(idxnsg->storage, f);
512
+ } else if (
513
+ const IndexPQFastScan* idxpqfs =
514
+ dynamic_cast<const IndexPQFastScan*>(idx)) {
429
515
  uint32_t h = fourcc("IPfs");
430
- WRITE1 (h);
431
- write_index_header (idxpqfs, f);
432
- write_ProductQuantizer (&idxpqfs->pq, f);
433
- WRITE1 (idxpqfs->implem);
434
- WRITE1 (idxpqfs->bbs);
435
- WRITE1 (idxpqfs->qbs);
436
- WRITE1 (idxpqfs->ntotal2);
437
- WRITE1 (idxpqfs->M2);
438
- WRITEVECTOR (idxpqfs->codes);
439
- } else if (const IndexIVFPQFastScan * ivpq =
440
- dynamic_cast<const IndexIVFPQFastScan *> (idx)) {
441
- uint32_t h = fourcc ("IwPf");
442
- WRITE1 (h);
443
- write_ivf_header (ivpq, f);
444
- WRITE1 (ivpq->by_residual);
445
- WRITE1 (ivpq->code_size);
446
- WRITE1 (ivpq->bbs);
447
- WRITE1 (ivpq->M2);
448
- WRITE1 (ivpq->implem);
449
- WRITE1 (ivpq->qbs2);
450
- write_ProductQuantizer (&ivpq->pq, f);
451
- write_InvertedLists (ivpq->invlists, f);
516
+ WRITE1(h);
517
+ write_index_header(idxpqfs, f);
518
+ write_ProductQuantizer(&idxpqfs->pq, f);
519
+ WRITE1(idxpqfs->implem);
520
+ WRITE1(idxpqfs->bbs);
521
+ WRITE1(idxpqfs->qbs);
522
+ WRITE1(idxpqfs->ntotal2);
523
+ WRITE1(idxpqfs->M2);
524
+ WRITEVECTOR(idxpqfs->codes);
525
+ } else if (
526
+ const IndexIVFPQFastScan* ivpq =
527
+ dynamic_cast<const IndexIVFPQFastScan*>(idx)) {
528
+ uint32_t h = fourcc("IwPf");
529
+ WRITE1(h);
530
+ write_ivf_header(ivpq, f);
531
+ WRITE1(ivpq->by_residual);
532
+ WRITE1(ivpq->code_size);
533
+ WRITE1(ivpq->bbs);
534
+ WRITE1(ivpq->M2);
535
+ WRITE1(ivpq->implem);
536
+ WRITE1(ivpq->qbs2);
537
+ write_ProductQuantizer(&ivpq->pq, f);
538
+ write_InvertedLists(ivpq->invlists, f);
452
539
  } else {
453
- FAISS_THROW_MSG ("don't know how to serialize this type of index");
540
+ FAISS_THROW_MSG("don't know how to serialize this type of index");
454
541
  }
455
542
  }
456
543
 
457
- void write_index (const Index *idx, FILE *f) {
544
+ void write_index(const Index* idx, FILE* f) {
458
545
  FileIOWriter writer(f);
459
- write_index (idx, &writer);
546
+ write_index(idx, &writer);
460
547
  }
461
548
 
462
- void write_index (const Index *idx, const char *fname) {
549
+ void write_index(const Index* idx, const char* fname) {
463
550
  FileIOWriter writer(fname);
464
- write_index (idx, &writer);
551
+ write_index(idx, &writer);
465
552
  }
466
553
 
467
- void write_VectorTransform (const VectorTransform *vt, const char *fname) {
554
+ void write_VectorTransform(const VectorTransform* vt, const char* fname) {
468
555
  FileIOWriter writer(fname);
469
- write_VectorTransform (vt, &writer);
556
+ write_VectorTransform(vt, &writer);
470
557
  }
471
558
 
472
-
473
559
  /*************************************************************
474
560
  * Write binary indexes
475
561
  **************************************************************/
476
562
 
477
-
478
- static void write_index_binary_header (const IndexBinary *idx, IOWriter *f) {
479
- WRITE1 (idx->d);
480
- WRITE1 (idx->code_size);
481
- WRITE1 (idx->ntotal);
482
- WRITE1 (idx->is_trained);
483
- WRITE1 (idx->metric_type);
563
+ static void write_index_binary_header(const IndexBinary* idx, IOWriter* f) {
564
+ WRITE1(idx->d);
565
+ WRITE1(idx->code_size);
566
+ WRITE1(idx->ntotal);
567
+ WRITE1(idx->is_trained);
568
+ WRITE1(idx->metric_type);
484
569
  }
485
570
 
486
- static void write_binary_ivf_header (const IndexBinaryIVF *ivf, IOWriter *f) {
487
- write_index_binary_header (ivf, f);
488
- WRITE1 (ivf->nlist);
489
- WRITE1 (ivf->nprobe);
490
- write_index_binary (ivf->quantizer, f);
491
- write_direct_map (&ivf->direct_map, f);
571
+ static void write_binary_ivf_header(const IndexBinaryIVF* ivf, IOWriter* f) {
572
+ write_index_binary_header(ivf, f);
573
+ WRITE1(ivf->nlist);
574
+ WRITE1(ivf->nprobe);
575
+ write_index_binary(ivf->quantizer, f);
576
+ write_direct_map(&ivf->direct_map, f);
492
577
  }
493
578
 
494
- static void write_binary_hash_invlists (
495
- const IndexBinaryHash::InvertedListMap &invlists,
496
- int b, IOWriter *f)
497
- {
579
+ static void write_binary_hash_invlists(
580
+ const IndexBinaryHash::InvertedListMap& invlists,
581
+ int b,
582
+ IOWriter* f) {
498
583
  size_t sz = invlists.size();
499
- WRITE1 (sz);
584
+ WRITE1(sz);
500
585
  size_t maxil = 0;
501
586
  for (auto it = invlists.begin(); it != invlists.end(); ++it) {
502
- if(it->second.ids.size() > maxil) {
587
+ if (it->second.ids.size() > maxil) {
503
588
  maxil = it->second.ids.size();
504
589
  }
505
590
  }
506
591
  int il_nbit = 0;
507
- while(maxil >= ((uint64_t)1 << il_nbit)) {
592
+ while (maxil >= ((uint64_t)1 << il_nbit)) {
508
593
  il_nbit++;
509
594
  }
510
595
  WRITE1(il_nbit);
@@ -513,25 +598,25 @@ static void write_binary_hash_invlists (
513
598
  // memmap it at some point
514
599
 
515
600
  // buffer for bitstrings
516
- std::vector<uint8_t> buf (((b + il_nbit) * sz + 7) / 8);
517
- BitstringWriter wr (buf.data(), buf.size());
601
+ std::vector<uint8_t> buf(((b + il_nbit) * sz + 7) / 8);
602
+ BitstringWriter wr(buf.data(), buf.size());
518
603
  for (auto it = invlists.begin(); it != invlists.end(); ++it) {
519
- wr.write (it->first, b);
520
- wr.write (it->second.ids.size(), il_nbit);
604
+ wr.write(it->first, b);
605
+ wr.write(it->second.ids.size(), il_nbit);
521
606
  }
522
- WRITEVECTOR (buf);
607
+ WRITEVECTOR(buf);
523
608
 
524
609
  for (auto it = invlists.begin(); it != invlists.end(); ++it) {
525
- WRITEVECTOR (it->second.ids);
526
- WRITEVECTOR (it->second.vecs);
610
+ WRITEVECTOR(it->second.ids);
611
+ WRITEVECTOR(it->second.vecs);
527
612
  }
528
613
  }
529
614
 
530
615
  static void write_binary_multi_hash_map(
531
- const IndexBinaryMultiHash::Map &map,
532
- int b, size_t ntotal,
533
- IOWriter *f)
534
- {
616
+ const IndexBinaryMultiHash::Map& map,
617
+ int b,
618
+ size_t ntotal,
619
+ IOWriter* f) {
535
620
  int id_bits = 0;
536
621
  while ((ntotal > ((Index::idx_t)1 << id_bits))) {
537
622
  id_bits++;
@@ -541,7 +626,7 @@ static void write_binary_multi_hash_map(
541
626
  WRITE1(sz);
542
627
  size_t nbit = (b + id_bits) * sz + ntotal * id_bits;
543
628
  std::vector<uint8_t> buf((nbit + 7) / 8);
544
- BitstringWriter wr (buf.data(), buf.size());
629
+ BitstringWriter wr(buf.data(), buf.size());
545
630
  for (auto it = map.begin(); it != map.end(); ++it) {
546
631
  wr.write(it->first, b);
547
632
  wr.write(it->second.size(), id_bits);
@@ -549,80 +634,85 @@ static void write_binary_multi_hash_map(
549
634
  wr.write(id, id_bits);
550
635
  }
551
636
  }
552
- WRITEVECTOR (buf);
637
+ WRITEVECTOR(buf);
553
638
  }
554
639
 
555
- void write_index_binary (const IndexBinary *idx, IOWriter *f) {
556
- if (const IndexBinaryFlat *idxf =
557
- dynamic_cast<const IndexBinaryFlat *> (idx)) {
558
- uint32_t h = fourcc ("IBxF");
559
- WRITE1 (h);
560
- write_index_binary_header (idx, f);
561
- WRITEVECTOR (idxf->xb);
562
- } else if (const IndexBinaryIVF *ivf =
563
- dynamic_cast<const IndexBinaryIVF *> (idx)) {
564
- uint32_t h = fourcc ("IBwF");
565
- WRITE1 (h);
566
- write_binary_ivf_header (ivf, f);
567
- write_InvertedLists (ivf->invlists, f);
568
- } else if(const IndexBinaryFromFloat * idxff =
569
- dynamic_cast<const IndexBinaryFromFloat *> (idx)) {
570
- uint32_t h = fourcc ("IBFf");
571
- WRITE1 (h);
572
- write_index_binary_header (idxff, f);
573
- write_index (idxff->index, f);
574
- } else if (const IndexBinaryHNSW *idxhnsw =
575
- dynamic_cast<const IndexBinaryHNSW *> (idx)) {
576
- uint32_t h = fourcc ("IBHf");
577
- WRITE1 (h);
578
- write_index_binary_header (idxhnsw, f);
579
- write_HNSW (&idxhnsw->hnsw, f);
580
- write_index_binary (idxhnsw->storage, f);
581
- } else if(const IndexBinaryIDMap * idxmap =
582
- dynamic_cast<const IndexBinaryIDMap *> (idx)) {
583
- uint32_t h =
584
- dynamic_cast<const IndexBinaryIDMap2 *> (idx) ? fourcc ("IBM2") :
585
- fourcc ("IBMp");
640
+ void write_index_binary(const IndexBinary* idx, IOWriter* f) {
641
+ if (const IndexBinaryFlat* idxf =
642
+ dynamic_cast<const IndexBinaryFlat*>(idx)) {
643
+ uint32_t h = fourcc("IBxF");
644
+ WRITE1(h);
645
+ write_index_binary_header(idx, f);
646
+ WRITEVECTOR(idxf->xb);
647
+ } else if (
648
+ const IndexBinaryIVF* ivf =
649
+ dynamic_cast<const IndexBinaryIVF*>(idx)) {
650
+ uint32_t h = fourcc("IBwF");
651
+ WRITE1(h);
652
+ write_binary_ivf_header(ivf, f);
653
+ write_InvertedLists(ivf->invlists, f);
654
+ } else if (
655
+ const IndexBinaryFromFloat* idxff =
656
+ dynamic_cast<const IndexBinaryFromFloat*>(idx)) {
657
+ uint32_t h = fourcc("IBFf");
658
+ WRITE1(h);
659
+ write_index_binary_header(idxff, f);
660
+ write_index(idxff->index, f);
661
+ } else if (
662
+ const IndexBinaryHNSW* idxhnsw =
663
+ dynamic_cast<const IndexBinaryHNSW*>(idx)) {
664
+ uint32_t h = fourcc("IBHf");
665
+ WRITE1(h);
666
+ write_index_binary_header(idxhnsw, f);
667
+ write_HNSW(&idxhnsw->hnsw, f);
668
+ write_index_binary(idxhnsw->storage, f);
669
+ } else if (
670
+ const IndexBinaryIDMap* idxmap =
671
+ dynamic_cast<const IndexBinaryIDMap*>(idx)) {
672
+ uint32_t h = dynamic_cast<const IndexBinaryIDMap2*>(idx)
673
+ ? fourcc("IBM2")
674
+ : fourcc("IBMp");
586
675
  // no need to store additional info for IndexIDMap2
587
- WRITE1 (h);
588
- write_index_binary_header (idxmap, f);
589
- write_index_binary (idxmap->index, f);
590
- WRITEVECTOR (idxmap->id_map);
591
- } else if (const IndexBinaryHash *idxh =
592
- dynamic_cast<const IndexBinaryHash *> (idx)) {
593
- uint32_t h = fourcc ("IBHh");
594
- WRITE1 (h);
595
- write_index_binary_header (idxh, f);
596
- WRITE1 (idxh->b);
597
- WRITE1 (idxh->nflip);
676
+ WRITE1(h);
677
+ write_index_binary_header(idxmap, f);
678
+ write_index_binary(idxmap->index, f);
679
+ WRITEVECTOR(idxmap->id_map);
680
+ } else if (
681
+ const IndexBinaryHash* idxh =
682
+ dynamic_cast<const IndexBinaryHash*>(idx)) {
683
+ uint32_t h = fourcc("IBHh");
684
+ WRITE1(h);
685
+ write_index_binary_header(idxh, f);
686
+ WRITE1(idxh->b);
687
+ WRITE1(idxh->nflip);
598
688
  write_binary_hash_invlists(idxh->invlists, idxh->b, f);
599
- } else if (const IndexBinaryMultiHash *idxmh =
600
- dynamic_cast<const IndexBinaryMultiHash *> (idx)) {
601
- uint32_t h = fourcc ("IBHm");
602
- WRITE1 (h);
603
- write_index_binary_header (idxmh, f);
604
- write_index_binary (idxmh->storage, f);
605
- WRITE1 (idxmh->b);
606
- WRITE1 (idxmh->nhash);
607
- WRITE1 (idxmh->nflip);
689
+ } else if (
690
+ const IndexBinaryMultiHash* idxmh =
691
+ dynamic_cast<const IndexBinaryMultiHash*>(idx)) {
692
+ uint32_t h = fourcc("IBHm");
693
+ WRITE1(h);
694
+ write_index_binary_header(idxmh, f);
695
+ write_index_binary(idxmh->storage, f);
696
+ WRITE1(idxmh->b);
697
+ WRITE1(idxmh->nhash);
698
+ WRITE1(idxmh->nflip);
608
699
  for (int i = 0; i < idxmh->nhash; i++) {
609
700
  write_binary_multi_hash_map(
610
701
  idxmh->maps[i], idxmh->b, idxmh->ntotal, f);
611
702
  }
612
703
  } else {
613
- FAISS_THROW_MSG ("don't know how to serialize this type of index");
704
+ FAISS_THROW_MSG("don't know how to serialize this type of index");
614
705
  }
615
706
  }
616
707
 
617
- void write_index_binary (const IndexBinary *idx, FILE *f) {
708
+ void write_index_binary(const IndexBinary* idx, FILE* f) {
618
709
  FileIOWriter writer(f);
619
710
  write_index_binary(idx, &writer);
620
711
  }
621
712
 
622
- void write_index_binary (const IndexBinary *idx, const char *fname) {
713
+ void write_index_binary(const IndexBinary* idx, const char* fname) {
623
714
  FileIOWriter writer(fname);
624
- write_index_binary (idx, &writer);
715
+ write_index_binary(idx, &writer);
625
716
  }
626
717
 
627
-
628
718
  } // namespace faiss