faiss 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  5. data/vendor/faiss/faiss/AutoTune.h +55 -56
  6. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  7. data/vendor/faiss/faiss/Clustering.h +88 -35
  8. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  9. data/vendor/faiss/faiss/IVFlib.h +48 -51
  10. data/vendor/faiss/faiss/Index.cpp +85 -103
  11. data/vendor/faiss/faiss/Index.h +54 -48
  12. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  13. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  14. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  15. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  16. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  17. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  18. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  25. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  26. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  27. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  29. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  30. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  31. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  32. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  33. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  34. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  35. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  38. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  39. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  42. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  43. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  44. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  45. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  46. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  47. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  48. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  49. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  50. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  51. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  52. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  53. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  54. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  55. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  56. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  57. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  58. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  59. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  60. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  61. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  62. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  63. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  64. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  65. data/vendor/faiss/faiss/IndexShards.h +85 -73
  66. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  67. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  68. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  69. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  70. data/vendor/faiss/faiss/MetricType.h +7 -7
  71. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  72. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  73. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  74. data/vendor/faiss/faiss/clone_index.h +4 -9
  75. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  76. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  77. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  78. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  79. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  82. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  84. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  85. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  89. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  90. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  91. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  92. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  93. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  94. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  95. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  96. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  97. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  98. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  99. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  100. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  101. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  102. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  103. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  104. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  105. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  110. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  111. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  112. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  113. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  114. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  115. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  116. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  117. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  118. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  119. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  125. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  126. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  127. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  128. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  134. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  135. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  136. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  137. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  138. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  139. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  141. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  142. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  144. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  145. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  146. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  147. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  148. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  149. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  150. data/vendor/faiss/faiss/impl/io.h +31 -41
  151. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  152. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  153. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  154. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  155. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  159. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  160. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  161. data/vendor/faiss/faiss/index_factory.h +6 -7
  162. data/vendor/faiss/faiss/index_io.h +23 -26
  163. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  164. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  165. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  166. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  167. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  168. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  169. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  170. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  172. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  173. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  174. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  175. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  176. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  177. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  178. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  179. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  180. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  181. data/vendor/faiss/faiss/utils/distances.h +133 -118
  182. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  183. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  184. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  185. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  186. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  187. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  188. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  189. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  190. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  191. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  192. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  193. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  194. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  195. data/vendor/faiss/faiss/utils/random.h +13 -16
  196. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  197. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  198. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  199. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  200. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  201. data/vendor/faiss/faiss/utils/utils.h +53 -48
  202. metadata +20 -2
@@ -9,11 +9,14 @@
9
9
 
10
10
  #include <faiss/index_io.h>
11
11
 
12
+ #include <faiss/impl/io.h>
13
+ #include <faiss/impl/io_macros.h>
14
+
12
15
  #include <cstdio>
13
16
  #include <cstdlib>
14
17
 
15
- #include <sys/types.h>
16
18
  #include <sys/stat.h>
19
+ #include <sys/types.h>
17
20
 
18
21
  #include <faiss/invlists/InvertedListsIOHook.h>
19
22
 
@@ -22,30 +25,32 @@
22
25
  #include <faiss/impl/io_macros.h>
23
26
  #include <faiss/utils/hamming.h>
24
27
 
28
+ #include <faiss/Index2Layer.h>
25
29
  #include <faiss/IndexFlat.h>
26
- #include <faiss/VectorTransform.h>
27
- #include <faiss/IndexPreTransform.h>
28
- #include <faiss/IndexLSH.h>
29
- #include <faiss/IndexPQ.h>
30
+ #include <faiss/IndexHNSW.h>
30
31
  #include <faiss/IndexIVF.h>
32
+ #include <faiss/IndexIVFFlat.h>
31
33
  #include <faiss/IndexIVFPQ.h>
34
+ #include <faiss/IndexIVFPQFastScan.h>
32
35
  #include <faiss/IndexIVFPQR.h>
33
- #include <faiss/Index2Layer.h>
34
- #include <faiss/IndexIVFFlat.h>
35
36
  #include <faiss/IndexIVFSpectralHash.h>
36
- #include <faiss/MetaIndexes.h>
37
- #include <faiss/IndexScalarQuantizer.h>
38
- #include <faiss/IndexHNSW.h>
37
+ #include <faiss/IndexLSH.h>
39
38
  #include <faiss/IndexLattice.h>
39
+ #include <faiss/IndexNSG.h>
40
+ #include <faiss/IndexPQ.h>
40
41
  #include <faiss/IndexPQFastScan.h>
41
- #include <faiss/IndexIVFPQFastScan.h>
42
+ #include <faiss/IndexPreTransform.h>
42
43
  #include <faiss/IndexRefine.h>
44
+ #include <faiss/IndexResidual.h>
45
+ #include <faiss/IndexScalarQuantizer.h>
46
+ #include <faiss/MetaIndexes.h>
47
+ #include <faiss/VectorTransform.h>
43
48
 
44
49
  #include <faiss/IndexBinaryFlat.h>
45
50
  #include <faiss/IndexBinaryFromFloat.h>
46
51
  #include <faiss/IndexBinaryHNSW.h>
47
- #include <faiss/IndexBinaryIVF.h>
48
52
  #include <faiss/IndexBinaryHash.h>
53
+ #include <faiss/IndexBinaryIVF.h>
49
54
 
50
55
  /*************************************************************
51
56
  * The I/O format is the content of the class. For objects that are
@@ -68,112 +73,120 @@
68
73
 
69
74
  namespace faiss {
70
75
 
71
-
72
76
  /*************************************************************
73
77
  * Write
74
78
  **************************************************************/
75
- static void write_index_header (const Index *idx, IOWriter *f) {
76
- WRITE1 (idx->d);
77
- WRITE1 (idx->ntotal);
79
+ static void write_index_header(const Index* idx, IOWriter* f) {
80
+ WRITE1(idx->d);
81
+ WRITE1(idx->ntotal);
78
82
  Index::idx_t dummy = 1 << 20;
79
- WRITE1 (dummy);
80
- WRITE1 (dummy);
81
- WRITE1 (idx->is_trained);
82
- WRITE1 (idx->metric_type);
83
+ WRITE1(dummy);
84
+ WRITE1(dummy);
85
+ WRITE1(idx->is_trained);
86
+ WRITE1(idx->metric_type);
83
87
  if (idx->metric_type > 1) {
84
- WRITE1 (idx->metric_arg);
88
+ WRITE1(idx->metric_arg);
85
89
  }
86
90
  }
87
91
 
88
- void write_VectorTransform (const VectorTransform *vt, IOWriter *f) {
89
- if (const LinearTransform * lt =
90
- dynamic_cast < const LinearTransform *> (vt)) {
91
- if (dynamic_cast<const RandomRotationMatrix *>(lt)) {
92
- uint32_t h = fourcc ("rrot");
93
- WRITE1 (h);
94
- } else if (const PCAMatrix * pca =
95
- dynamic_cast<const PCAMatrix *>(lt)) {
96
- uint32_t h = fourcc ("PcAm");
97
- WRITE1 (h);
98
- WRITE1 (pca->eigen_power);
99
- WRITE1 (pca->random_rotation);
100
- WRITE1 (pca->balanced_bins);
101
- WRITEVECTOR (pca->mean);
102
- WRITEVECTOR (pca->eigenvalues);
103
- WRITEVECTOR (pca->PCAMat);
104
- } else if (const ITQMatrix * itqm =
105
- dynamic_cast<const ITQMatrix *>(lt)) {
106
- uint32_t h = fourcc ("Viqm");
107
- WRITE1 (h);
108
- WRITE1 (itqm->max_iter);
109
- WRITE1 (itqm->seed);
92
+ void write_VectorTransform(const VectorTransform* vt, IOWriter* f) {
93
+ if (const LinearTransform* lt = dynamic_cast<const LinearTransform*>(vt)) {
94
+ if (dynamic_cast<const RandomRotationMatrix*>(lt)) {
95
+ uint32_t h = fourcc("rrot");
96
+ WRITE1(h);
97
+ } else if (const PCAMatrix* pca = dynamic_cast<const PCAMatrix*>(lt)) {
98
+ uint32_t h = fourcc("PcAm");
99
+ WRITE1(h);
100
+ WRITE1(pca->eigen_power);
101
+ WRITE1(pca->random_rotation);
102
+ WRITE1(pca->balanced_bins);
103
+ WRITEVECTOR(pca->mean);
104
+ WRITEVECTOR(pca->eigenvalues);
105
+ WRITEVECTOR(pca->PCAMat);
106
+ } else if (const ITQMatrix* itqm = dynamic_cast<const ITQMatrix*>(lt)) {
107
+ uint32_t h = fourcc("Viqm");
108
+ WRITE1(h);
109
+ WRITE1(itqm->max_iter);
110
+ WRITE1(itqm->seed);
110
111
  } else {
111
112
  // generic LinearTransform (includes OPQ)
112
- uint32_t h = fourcc ("LTra");
113
- WRITE1 (h);
113
+ uint32_t h = fourcc("LTra");
114
+ WRITE1(h);
114
115
  }
115
- WRITE1 (lt->have_bias);
116
- WRITEVECTOR (lt->A);
117
- WRITEVECTOR (lt->b);
118
- } else if (const RemapDimensionsTransform *rdt =
119
- dynamic_cast<const RemapDimensionsTransform *>(vt)) {
120
- uint32_t h = fourcc ("RmDT");
121
- WRITE1 (h);
122
- WRITEVECTOR (rdt->map);
123
- } else if (const NormalizationTransform *nt =
124
- dynamic_cast<const NormalizationTransform *>(vt)) {
125
- uint32_t h = fourcc ("VNrm");
126
- WRITE1 (h);
127
- WRITE1 (nt->norm);
128
- } else if (const CenteringTransform *ct =
129
- dynamic_cast<const CenteringTransform *>(vt)) {
130
- uint32_t h = fourcc ("VCnt");
131
- WRITE1 (h);
132
- WRITEVECTOR (ct->mean);
133
- } else if (const ITQTransform *itqt =
134
- dynamic_cast<const ITQTransform*> (vt)) {
135
- uint32_t h = fourcc ("Viqt");
136
- WRITE1 (h);
137
- WRITEVECTOR (itqt->mean);
138
- WRITE1 (itqt->do_pca);
139
- write_VectorTransform (&itqt->itq, f);
140
- write_VectorTransform (&itqt->pca_then_itq, f);
116
+ WRITE1(lt->have_bias);
117
+ WRITEVECTOR(lt->A);
118
+ WRITEVECTOR(lt->b);
119
+ } else if (
120
+ const RemapDimensionsTransform* rdt =
121
+ dynamic_cast<const RemapDimensionsTransform*>(vt)) {
122
+ uint32_t h = fourcc("RmDT");
123
+ WRITE1(h);
124
+ WRITEVECTOR(rdt->map);
125
+ } else if (
126
+ const NormalizationTransform* nt =
127
+ dynamic_cast<const NormalizationTransform*>(vt)) {
128
+ uint32_t h = fourcc("VNrm");
129
+ WRITE1(h);
130
+ WRITE1(nt->norm);
131
+ } else if (
132
+ const CenteringTransform* ct =
133
+ dynamic_cast<const CenteringTransform*>(vt)) {
134
+ uint32_t h = fourcc("VCnt");
135
+ WRITE1(h);
136
+ WRITEVECTOR(ct->mean);
137
+ } else if (
138
+ const ITQTransform* itqt = dynamic_cast<const ITQTransform*>(vt)) {
139
+ uint32_t h = fourcc("Viqt");
140
+ WRITE1(h);
141
+ WRITEVECTOR(itqt->mean);
142
+ WRITE1(itqt->do_pca);
143
+ write_VectorTransform(&itqt->itq, f);
144
+ write_VectorTransform(&itqt->pca_then_itq, f);
141
145
  } else {
142
- FAISS_THROW_MSG ("cannot serialize this");
146
+ FAISS_THROW_MSG("cannot serialize this");
143
147
  }
144
148
  // common fields
145
- WRITE1 (vt->d_in);
146
- WRITE1 (vt->d_out);
147
- WRITE1 (vt->is_trained);
149
+ WRITE1(vt->d_in);
150
+ WRITE1(vt->d_out);
151
+ WRITE1(vt->is_trained);
148
152
  }
149
153
 
150
- void write_ProductQuantizer (const ProductQuantizer *pq, IOWriter *f) {
151
- WRITE1 (pq->d);
152
- WRITE1 (pq->M);
153
- WRITE1 (pq->nbits);
154
- WRITEVECTOR (pq->centroids);
154
+ void write_ProductQuantizer(const ProductQuantizer* pq, IOWriter* f) {
155
+ WRITE1(pq->d);
156
+ WRITE1(pq->M);
157
+ WRITE1(pq->nbits);
158
+ WRITEVECTOR(pq->centroids);
155
159
  }
156
160
 
157
- static void write_ScalarQuantizer (
158
- const ScalarQuantizer *ivsc, IOWriter *f) {
159
- WRITE1 (ivsc->qtype);
160
- WRITE1 (ivsc->rangestat);
161
- WRITE1 (ivsc->rangestat_arg);
162
- WRITE1 (ivsc->d);
163
- WRITE1 (ivsc->code_size);
164
- WRITEVECTOR (ivsc->trained);
161
+ void write_ResidualQuantizer(const ResidualQuantizer* rq, IOWriter* f) {
162
+ WRITE1(rq->d);
163
+ WRITE1(rq->M);
164
+ WRITEVECTOR(rq->nbits);
165
+ WRITE1(rq->is_trained);
166
+ WRITE1(rq->train_type);
167
+ WRITE1(rq->max_beam_size);
168
+ WRITEVECTOR(rq->codebooks);
165
169
  }
166
170
 
167
- void write_InvertedLists (const InvertedLists *ils, IOWriter *f) {
171
+ static void write_ScalarQuantizer(const ScalarQuantizer* ivsc, IOWriter* f) {
172
+ WRITE1(ivsc->qtype);
173
+ WRITE1(ivsc->rangestat);
174
+ WRITE1(ivsc->rangestat_arg);
175
+ WRITE1(ivsc->d);
176
+ WRITE1(ivsc->code_size);
177
+ WRITEVECTOR(ivsc->trained);
178
+ }
179
+
180
+ void write_InvertedLists(const InvertedLists* ils, IOWriter* f) {
168
181
  if (ils == nullptr) {
169
- uint32_t h = fourcc ("il00");
170
- WRITE1 (h);
171
- } else if (const auto & ails =
172
- dynamic_cast<const ArrayInvertedLists *>(ils)) {
173
- uint32_t h = fourcc ("ilar");
174
- WRITE1 (h);
175
- WRITE1 (ails->nlist);
176
- WRITE1 (ails->code_size);
182
+ uint32_t h = fourcc("il00");
183
+ WRITE1(h);
184
+ } else if (
185
+ const auto& ails = dynamic_cast<const ArrayInvertedLists*>(ils)) {
186
+ uint32_t h = fourcc("ilar");
187
+ WRITE1(h);
188
+ WRITE1(ails->nlist);
189
+ WRITE1(ails->code_size);
177
190
  // here we store either as a full or a sparse data buffer
178
191
  size_t n_non0 = 0;
179
192
  for (size_t i = 0; i < ails->nlist; i++) {
@@ -182,329 +195,401 @@ void write_InvertedLists (const InvertedLists *ils, IOWriter *f) {
182
195
  }
183
196
  if (n_non0 > ails->nlist / 2) {
184
197
  uint32_t list_type = fourcc("full");
185
- WRITE1 (list_type);
198
+ WRITE1(list_type);
186
199
  std::vector<size_t> sizes;
187
200
  for (size_t i = 0; i < ails->nlist; i++) {
188
- sizes.push_back (ails->ids[i].size());
201
+ sizes.push_back(ails->ids[i].size());
189
202
  }
190
- WRITEVECTOR (sizes);
203
+ WRITEVECTOR(sizes);
191
204
  } else {
192
205
  int list_type = fourcc("sprs"); // sparse
193
- WRITE1 (list_type);
206
+ WRITE1(list_type);
194
207
  std::vector<size_t> sizes;
195
208
  for (size_t i = 0; i < ails->nlist; i++) {
196
209
  size_t n = ails->ids[i].size();
197
210
  if (n > 0) {
198
- sizes.push_back (i);
199
- sizes.push_back (n);
211
+ sizes.push_back(i);
212
+ sizes.push_back(n);
200
213
  }
201
214
  }
202
- WRITEVECTOR (sizes);
215
+ WRITEVECTOR(sizes);
203
216
  }
204
217
  // make a single contiguous data buffer (useful for mmapping)
205
218
  for (size_t i = 0; i < ails->nlist; i++) {
206
219
  size_t n = ails->ids[i].size();
207
220
  if (n > 0) {
208
- WRITEANDCHECK (ails->codes[i].data(), n * ails->code_size);
209
- WRITEANDCHECK (ails->ids[i].data(), n);
221
+ WRITEANDCHECK(ails->codes[i].data(), n * ails->code_size);
222
+ WRITEANDCHECK(ails->ids[i].data(), n);
210
223
  }
211
224
  }
212
225
 
213
226
  } else {
214
- InvertedListsIOHook::lookup_classname(
215
- typeid(*ils).name())->write(ils, f);
227
+ InvertedListsIOHook::lookup_classname(typeid(*ils).name())
228
+ ->write(ils, f);
216
229
  }
217
230
  }
218
231
 
219
-
220
- void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) {
232
+ void write_ProductQuantizer(const ProductQuantizer* pq, const char* fname) {
221
233
  FileIOWriter writer(fname);
222
- write_ProductQuantizer (pq, &writer);
234
+ write_ProductQuantizer(pq, &writer);
223
235
  }
224
236
 
225
- static void write_HNSW (const HNSW *hnsw, IOWriter *f) {
237
+ static void write_HNSW(const HNSW* hnsw, IOWriter* f) {
238
+ WRITEVECTOR(hnsw->assign_probas);
239
+ WRITEVECTOR(hnsw->cum_nneighbor_per_level);
240
+ WRITEVECTOR(hnsw->levels);
241
+ WRITEVECTOR(hnsw->offsets);
242
+ WRITEVECTOR(hnsw->neighbors);
243
+
244
+ WRITE1(hnsw->entry_point);
245
+ WRITE1(hnsw->max_level);
246
+ WRITE1(hnsw->efConstruction);
247
+ WRITE1(hnsw->efSearch);
248
+ WRITE1(hnsw->upper_beam);
249
+ }
226
250
 
227
- WRITEVECTOR (hnsw->assign_probas);
228
- WRITEVECTOR (hnsw->cum_nneighbor_per_level);
229
- WRITEVECTOR (hnsw->levels);
230
- WRITEVECTOR (hnsw->offsets);
231
- WRITEVECTOR (hnsw->neighbors);
251
+ static void write_NSG(const NSG* nsg, IOWriter* f) {
252
+ WRITE1(nsg->ntotal);
253
+ WRITE1(nsg->R);
254
+ WRITE1(nsg->L);
255
+ WRITE1(nsg->C);
256
+ WRITE1(nsg->search_L);
257
+ WRITE1(nsg->enterpoint);
258
+ WRITE1(nsg->is_built);
259
+
260
+ if (!nsg->is_built) {
261
+ return;
262
+ }
232
263
 
233
- WRITE1 (hnsw->entry_point);
234
- WRITE1 (hnsw->max_level);
235
- WRITE1 (hnsw->efConstruction);
236
- WRITE1 (hnsw->efSearch);
237
- WRITE1 (hnsw->upper_beam);
264
+ constexpr int EMPTY_ID = -1;
265
+ auto& graph = nsg->final_graph;
266
+ int K = graph->K;
267
+ int N = graph->N;
268
+ FAISS_THROW_IF_NOT(N == nsg->ntotal);
269
+ FAISS_THROW_IF_NOT(K == nsg->R);
270
+ FAISS_THROW_IF_NOT(true == graph->own_fields);
271
+
272
+ int size = 0;
273
+ for (int i = 0; i < N; i++) {
274
+ for (int j = 0; j < K; j++) {
275
+ int id = graph->at(i, j);
276
+ if (id != EMPTY_ID) {
277
+ WRITE1(id);
278
+ size += 1;
279
+ } else {
280
+ break;
281
+ }
282
+ }
283
+ WRITE1(EMPTY_ID);
284
+ }
238
285
  }
239
286
 
240
- static void write_direct_map (const DirectMap *dm, IOWriter *f) {
241
- char maintain_direct_map = (char)dm->type; // for backwards compatibility with bool
242
- WRITE1 (maintain_direct_map);
243
- WRITEVECTOR (dm->array);
287
+ static void write_direct_map(const DirectMap* dm, IOWriter* f) {
288
+ char maintain_direct_map =
289
+ (char)dm->type; // for backwards compatibility with bool
290
+ WRITE1(maintain_direct_map);
291
+ WRITEVECTOR(dm->array);
244
292
  if (dm->type == DirectMap::Hashtable) {
245
293
  using idx_t = Index::idx_t;
246
294
  std::vector<std::pair<idx_t, idx_t>> v;
247
- const std::unordered_map<idx_t, idx_t> & map = dm->hashtable;
248
- v.resize (map.size());
295
+ const std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
296
+ v.resize(map.size());
249
297
  std::copy(map.begin(), map.end(), v.begin());
250
- WRITEVECTOR (v);
298
+ WRITEVECTOR(v);
251
299
  }
252
300
  }
253
301
 
254
- static void write_ivf_header (const IndexIVF *ivf, IOWriter *f) {
255
- write_index_header (ivf, f);
256
- WRITE1 (ivf->nlist);
257
- WRITE1 (ivf->nprobe);
258
- write_index (ivf->quantizer, f);
259
- write_direct_map (&ivf->direct_map, f);
302
+ static void write_ivf_header(const IndexIVF* ivf, IOWriter* f) {
303
+ write_index_header(ivf, f);
304
+ WRITE1(ivf->nlist);
305
+ WRITE1(ivf->nprobe);
306
+ write_index(ivf->quantizer, f);
307
+ write_direct_map(&ivf->direct_map, f);
260
308
  }
261
309
 
262
- void write_index (const Index *idx, IOWriter *f) {
263
- if (const IndexFlat * idxf = dynamic_cast<const IndexFlat *> (idx)) {
264
- uint32_t h = fourcc (
265
- idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" :
266
- idxf->metric_type == METRIC_L2 ? "IxF2" : "IxFl");
267
- WRITE1 (h);
268
- write_index_header (idx, f);
269
- WRITEVECTOR (idxf->xb);
270
- } else if(const IndexLSH * idxl = dynamic_cast<const IndexLSH *> (idx)) {
271
- uint32_t h = fourcc ("IxHe");
272
- WRITE1 (h);
273
- write_index_header (idx, f);
274
- WRITE1 (idxl->nbits);
275
- WRITE1 (idxl->rotate_data);
276
- WRITE1 (idxl->train_thresholds);
277
- WRITEVECTOR (idxl->thresholds);
278
- WRITE1 (idxl->bytes_per_vec);
279
- write_VectorTransform (&idxl->rrot, f);
280
- WRITEVECTOR (idxl->codes);
281
- } else if(const IndexPQ * idxp = dynamic_cast<const IndexPQ *> (idx)) {
282
- uint32_t h = fourcc ("IxPq");
283
- WRITE1 (h);
284
- write_index_header (idx, f);
285
- write_ProductQuantizer (&idxp->pq, f);
286
- WRITEVECTOR (idxp->codes);
310
+ void write_index(const Index* idx, IOWriter* f) {
311
+ if (const IndexFlat* idxf = dynamic_cast<const IndexFlat*>(idx)) {
312
+ uint32_t h =
313
+ fourcc(idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI"
314
+ : idxf->metric_type == METRIC_L2 ? "IxF2"
315
+ : "IxFl");
316
+ WRITE1(h);
317
+ write_index_header(idx, f);
318
+ WRITEVECTOR(idxf->xb);
319
+ } else if (const IndexLSH* idxl = dynamic_cast<const IndexLSH*>(idx)) {
320
+ uint32_t h = fourcc("IxHe");
321
+ WRITE1(h);
322
+ write_index_header(idx, f);
323
+ WRITE1(idxl->nbits);
324
+ WRITE1(idxl->rotate_data);
325
+ WRITE1(idxl->train_thresholds);
326
+ WRITEVECTOR(idxl->thresholds);
327
+ WRITE1(idxl->bytes_per_vec);
328
+ write_VectorTransform(&idxl->rrot, f);
329
+ WRITEVECTOR(idxl->codes);
330
+ } else if (const IndexPQ* idxp = dynamic_cast<const IndexPQ*>(idx)) {
331
+ uint32_t h = fourcc("IxPq");
332
+ WRITE1(h);
333
+ write_index_header(idx, f);
334
+ write_ProductQuantizer(&idxp->pq, f);
335
+ WRITEVECTOR(idxp->codes);
287
336
  // search params -- maybe not useful to store?
288
- WRITE1 (idxp->search_type);
289
- WRITE1 (idxp->encode_signs);
290
- WRITE1 (idxp->polysemous_ht);
291
- } else if(const Index2Layer * idxp =
292
- dynamic_cast<const Index2Layer *> (idx)) {
293
- uint32_t h = fourcc ("Ix2L");
294
- WRITE1 (h);
295
- write_index_header (idx, f);
296
- write_index (idxp->q1.quantizer, f);
297
- WRITE1 (idxp->q1.nlist);
298
- WRITE1 (idxp->q1.quantizer_trains_alone);
299
- write_ProductQuantizer (&idxp->pq, f);
300
- WRITE1 (idxp->code_size_1);
301
- WRITE1 (idxp->code_size_2);
302
- WRITE1 (idxp->code_size);
303
- WRITEVECTOR (idxp->codes);
304
- } else if(const IndexScalarQuantizer * idxs =
305
- dynamic_cast<const IndexScalarQuantizer *> (idx)) {
306
- uint32_t h = fourcc ("IxSQ");
307
- WRITE1 (h);
308
- write_index_header (idx, f);
309
- write_ScalarQuantizer (&idxs->sq, f);
310
- WRITEVECTOR (idxs->codes);
311
- } else if(const IndexLattice * idxl =
312
- dynamic_cast<const IndexLattice *> (idx)) {
313
- uint32_t h = fourcc ("IxLa");
314
- WRITE1 (h);
315
- WRITE1 (idxl->d);
316
- WRITE1 (idxl->nsq);
317
- WRITE1 (idxl->scale_nbit);
318
- WRITE1 (idxl->zn_sphere_codec.r2);
319
- write_index_header (idx, f);
320
- WRITEVECTOR (idxl->trained);
321
- } else if(const IndexIVFFlatDedup * ivfl =
322
- dynamic_cast<const IndexIVFFlatDedup *> (idx)) {
323
- uint32_t h = fourcc ("IwFd");
324
- WRITE1 (h);
325
- write_ivf_header (ivfl, f);
337
+ WRITE1(idxp->search_type);
338
+ WRITE1(idxp->encode_signs);
339
+ WRITE1(idxp->polysemous_ht);
340
+ } else if (
341
+ const IndexResidual* idxr =
342
+ dynamic_cast<const IndexResidual*>(idx)) {
343
+ uint32_t h = fourcc("IxRQ");
344
+ WRITE1(h);
345
+ write_index_header(idx, f);
346
+ write_ResidualQuantizer(&idxr->rq, f);
347
+ WRITE1(idxr->search_type);
348
+ WRITE1(idxr->norm_min);
349
+ WRITE1(idxr->norm_max);
350
+ WRITE1(idxr->code_size);
351
+ WRITEVECTOR(idxr->codes);
352
+ } else if (
353
+ const ResidualCoarseQuantizer* idxr =
354
+ dynamic_cast<const ResidualCoarseQuantizer*>(idx)) {
355
+ uint32_t h = fourcc("ImRQ");
356
+ WRITE1(h);
357
+ write_index_header(idx, f);
358
+ write_ResidualQuantizer(&idxr->rq, f);
359
+ WRITE1(idxr->beam_factor);
360
+ } else if (
361
+ const Index2Layer* idxp = dynamic_cast<const Index2Layer*>(idx)) {
362
+ uint32_t h = fourcc("Ix2L");
363
+ WRITE1(h);
364
+ write_index_header(idx, f);
365
+ write_index(idxp->q1.quantizer, f);
366
+ WRITE1(idxp->q1.nlist);
367
+ WRITE1(idxp->q1.quantizer_trains_alone);
368
+ write_ProductQuantizer(&idxp->pq, f);
369
+ WRITE1(idxp->code_size_1);
370
+ WRITE1(idxp->code_size_2);
371
+ WRITE1(idxp->code_size);
372
+ WRITEVECTOR(idxp->codes);
373
+ } else if (
374
+ const IndexScalarQuantizer* idxs =
375
+ dynamic_cast<const IndexScalarQuantizer*>(idx)) {
376
+ uint32_t h = fourcc("IxSQ");
377
+ WRITE1(h);
378
+ write_index_header(idx, f);
379
+ write_ScalarQuantizer(&idxs->sq, f);
380
+ WRITEVECTOR(idxs->codes);
381
+ } else if (
382
+ const IndexLattice* idxl = dynamic_cast<const IndexLattice*>(idx)) {
383
+ uint32_t h = fourcc("IxLa");
384
+ WRITE1(h);
385
+ WRITE1(idxl->d);
386
+ WRITE1(idxl->nsq);
387
+ WRITE1(idxl->scale_nbit);
388
+ WRITE1(idxl->zn_sphere_codec.r2);
389
+ write_index_header(idx, f);
390
+ WRITEVECTOR(idxl->trained);
391
+ } else if (
392
+ const IndexIVFFlatDedup* ivfl =
393
+ dynamic_cast<const IndexIVFFlatDedup*>(idx)) {
394
+ uint32_t h = fourcc("IwFd");
395
+ WRITE1(h);
396
+ write_ivf_header(ivfl, f);
326
397
  {
327
- std::vector<Index::idx_t> tab (2 * ivfl->instances.size());
398
+ std::vector<Index::idx_t> tab(2 * ivfl->instances.size());
328
399
  long i = 0;
329
- for (auto it = ivfl->instances.begin();
330
- it != ivfl->instances.end(); ++it) {
400
+ for (auto it = ivfl->instances.begin(); it != ivfl->instances.end();
401
+ ++it) {
331
402
  tab[i++] = it->first;
332
403
  tab[i++] = it->second;
333
404
  }
334
- WRITEVECTOR (tab);
405
+ WRITEVECTOR(tab);
335
406
  }
336
- write_InvertedLists (ivfl->invlists, f);
337
- } else if(const IndexIVFFlat * ivfl =
338
- dynamic_cast<const IndexIVFFlat *> (idx)) {
339
- uint32_t h = fourcc ("IwFl");
340
- WRITE1 (h);
341
- write_ivf_header (ivfl, f);
342
- write_InvertedLists (ivfl->invlists, f);
343
- } else if(const IndexIVFScalarQuantizer * ivsc =
344
- dynamic_cast<const IndexIVFScalarQuantizer *> (idx)) {
345
- uint32_t h = fourcc ("IwSq");
346
- WRITE1 (h);
347
- write_ivf_header (ivsc, f);
348
- write_ScalarQuantizer (&ivsc->sq, f);
349
- WRITE1 (ivsc->code_size);
350
- WRITE1 (ivsc->by_residual);
351
- write_InvertedLists (ivsc->invlists, f);
352
- } else if(const IndexIVFSpectralHash *ivsp =
353
- dynamic_cast<const IndexIVFSpectralHash *>(idx)) {
354
- uint32_t h = fourcc ("IwSh");
355
- WRITE1 (h);
356
- write_ivf_header (ivsp, f);
357
- write_VectorTransform (ivsp->vt, f);
358
- WRITE1 (ivsp->nbit);
359
- WRITE1 (ivsp->period);
360
- WRITE1 (ivsp->threshold_type);
361
- WRITEVECTOR (ivsp->trained);
362
- write_InvertedLists (ivsp->invlists, f);
363
- } else if(const IndexIVFPQ * ivpq =
364
- dynamic_cast<const IndexIVFPQ *> (idx)) {
365
- const IndexIVFPQR * ivfpqr = dynamic_cast<const IndexIVFPQR *> (idx);
366
-
367
- uint32_t h = fourcc (ivfpqr ? "IwQR" : "IwPQ");
368
- WRITE1 (h);
369
- write_ivf_header (ivpq, f);
370
- WRITE1 (ivpq->by_residual);
371
- WRITE1 (ivpq->code_size);
372
- write_ProductQuantizer (&ivpq->pq, f);
373
- write_InvertedLists (ivpq->invlists, f);
407
+ write_InvertedLists(ivfl->invlists, f);
408
+ } else if (
409
+ const IndexIVFFlat* ivfl = dynamic_cast<const IndexIVFFlat*>(idx)) {
410
+ uint32_t h = fourcc("IwFl");
411
+ WRITE1(h);
412
+ write_ivf_header(ivfl, f);
413
+ write_InvertedLists(ivfl->invlists, f);
414
+ } else if (
415
+ const IndexIVFScalarQuantizer* ivsc =
416
+ dynamic_cast<const IndexIVFScalarQuantizer*>(idx)) {
417
+ uint32_t h = fourcc("IwSq");
418
+ WRITE1(h);
419
+ write_ivf_header(ivsc, f);
420
+ write_ScalarQuantizer(&ivsc->sq, f);
421
+ WRITE1(ivsc->code_size);
422
+ WRITE1(ivsc->by_residual);
423
+ write_InvertedLists(ivsc->invlists, f);
424
+ } else if (
425
+ const IndexIVFSpectralHash* ivsp =
426
+ dynamic_cast<const IndexIVFSpectralHash*>(idx)) {
427
+ uint32_t h = fourcc("IwSh");
428
+ WRITE1(h);
429
+ write_ivf_header(ivsp, f);
430
+ write_VectorTransform(ivsp->vt, f);
431
+ WRITE1(ivsp->nbit);
432
+ WRITE1(ivsp->period);
433
+ WRITE1(ivsp->threshold_type);
434
+ WRITEVECTOR(ivsp->trained);
435
+ write_InvertedLists(ivsp->invlists, f);
436
+ } else if (const IndexIVFPQ* ivpq = dynamic_cast<const IndexIVFPQ*>(idx)) {
437
+ const IndexIVFPQR* ivfpqr = dynamic_cast<const IndexIVFPQR*>(idx);
438
+
439
+ uint32_t h = fourcc(ivfpqr ? "IwQR" : "IwPQ");
440
+ WRITE1(h);
441
+ write_ivf_header(ivpq, f);
442
+ WRITE1(ivpq->by_residual);
443
+ WRITE1(ivpq->code_size);
444
+ write_ProductQuantizer(&ivpq->pq, f);
445
+ write_InvertedLists(ivpq->invlists, f);
374
446
  if (ivfpqr) {
375
- write_ProductQuantizer (&ivfpqr->refine_pq, f);
376
- WRITEVECTOR (ivfpqr->refine_codes);
377
- WRITE1 (ivfpqr->k_factor);
447
+ write_ProductQuantizer(&ivfpqr->refine_pq, f);
448
+ WRITEVECTOR(ivfpqr->refine_codes);
449
+ WRITE1(ivfpqr->k_factor);
378
450
  }
379
451
 
380
- } else if(const IndexPreTransform * ixpt =
381
- dynamic_cast<const IndexPreTransform *> (idx)) {
382
- uint32_t h = fourcc ("IxPT");
383
- WRITE1 (h);
384
- write_index_header (ixpt, f);
452
+ } else if (
453
+ const IndexPreTransform* ixpt =
454
+ dynamic_cast<const IndexPreTransform*>(idx)) {
455
+ uint32_t h = fourcc("IxPT");
456
+ WRITE1(h);
457
+ write_index_header(ixpt, f);
385
458
  int nt = ixpt->chain.size();
386
- WRITE1 (nt);
459
+ WRITE1(nt);
387
460
  for (int i = 0; i < nt; i++)
388
- write_VectorTransform (ixpt->chain[i], f);
389
- write_index (ixpt->index, f);
390
- } else if(const MultiIndexQuantizer * imiq =
391
- dynamic_cast<const MultiIndexQuantizer *> (idx)) {
392
- uint32_t h = fourcc ("Imiq");
393
- WRITE1 (h);
394
- write_index_header (imiq, f);
395
- write_ProductQuantizer (&imiq->pq, f);
396
- } else if(const IndexRefine * idxrf =
397
- dynamic_cast<const IndexRefine *> (idx)) {
398
- uint32_t h = fourcc ("IxRF");
399
- WRITE1 (h);
400
- write_index_header (idxrf, f);
401
- write_index (idxrf->base_index, f);
402
- write_index (idxrf->refine_index, f);
403
- WRITE1 (idxrf->k_factor);
404
- } else if(const IndexIDMap * idxmap =
405
- dynamic_cast<const IndexIDMap *> (idx)) {
406
- uint32_t h =
407
- dynamic_cast<const IndexIDMap2 *> (idx) ? fourcc ("IxM2") :
408
- fourcc ("IxMp");
461
+ write_VectorTransform(ixpt->chain[i], f);
462
+ write_index(ixpt->index, f);
463
+ } else if (
464
+ const MultiIndexQuantizer* imiq =
465
+ dynamic_cast<const MultiIndexQuantizer*>(idx)) {
466
+ uint32_t h = fourcc("Imiq");
467
+ WRITE1(h);
468
+ write_index_header(imiq, f);
469
+ write_ProductQuantizer(&imiq->pq, f);
470
+ } else if (
471
+ const IndexRefine* idxrf = dynamic_cast<const IndexRefine*>(idx)) {
472
+ uint32_t h = fourcc("IxRF");
473
+ WRITE1(h);
474
+ write_index_header(idxrf, f);
475
+ write_index(idxrf->base_index, f);
476
+ write_index(idxrf->refine_index, f);
477
+ WRITE1(idxrf->k_factor);
478
+ } else if (
479
+ const IndexIDMap* idxmap = dynamic_cast<const IndexIDMap*>(idx)) {
480
+ uint32_t h = dynamic_cast<const IndexIDMap2*>(idx) ? fourcc("IxM2")
481
+ : fourcc("IxMp");
409
482
  // no need to store additional info for IndexIDMap2
410
- WRITE1 (h);
411
- write_index_header (idxmap, f);
412
- write_index (idxmap->index, f);
413
- WRITEVECTOR (idxmap->id_map);
414
- } else if(const IndexHNSW * idxhnsw =
415
- dynamic_cast<const IndexHNSW *> (idx)) {
483
+ WRITE1(h);
484
+ write_index_header(idxmap, f);
485
+ write_index(idxmap->index, f);
486
+ WRITEVECTOR(idxmap->id_map);
487
+ } else if (const IndexHNSW* idxhnsw = dynamic_cast<const IndexHNSW*>(idx)) {
488
+ uint32_t h = dynamic_cast<const IndexHNSWFlat*>(idx) ? fourcc("IHNf")
489
+ : dynamic_cast<const IndexHNSWPQ*>(idx) ? fourcc("IHNp")
490
+ : dynamic_cast<const IndexHNSWSQ*>(idx) ? fourcc("IHNs")
491
+ : dynamic_cast<const IndexHNSW2Level*>(idx) ? fourcc("IHN2")
492
+ : 0;
493
+ FAISS_THROW_IF_NOT(h != 0);
494
+ WRITE1(h);
495
+ write_index_header(idxhnsw, f);
496
+ write_HNSW(&idxhnsw->hnsw, f);
497
+ write_index(idxhnsw->storage, f);
498
+ } else if (const IndexNSG* idxnsg = dynamic_cast<const IndexNSG*>(idx)) {
416
499
  uint32_t h =
417
- dynamic_cast<const IndexHNSWFlat*>(idx) ? fourcc("IHNf") :
418
- dynamic_cast<const IndexHNSWPQ*>(idx) ? fourcc("IHNp") :
419
- dynamic_cast<const IndexHNSWSQ*>(idx) ? fourcc("IHNs") :
420
- dynamic_cast<const IndexHNSW2Level*>(idx) ? fourcc("IHN2") :
421
- 0;
422
- FAISS_THROW_IF_NOT (h != 0);
423
- WRITE1 (h);
424
- write_index_header (idxhnsw, f);
425
- write_HNSW (&idxhnsw->hnsw, f);
426
- write_index (idxhnsw->storage, f);
427
- } else if (const IndexPQFastScan *idxpqfs =
428
- dynamic_cast<const IndexPQFastScan*>(idx)) {
500
+ dynamic_cast<const IndexNSGFlat*>(idx) ? fourcc("INSf") : 0;
501
+ FAISS_THROW_IF_NOT(h != 0);
502
+ WRITE1(h);
503
+ write_index_header(idxnsg, f);
504
+ WRITE1(idxnsg->GK);
505
+ WRITE1(idxnsg->build_type);
506
+ WRITE1(idxnsg->nndescent_S);
507
+ WRITE1(idxnsg->nndescent_R);
508
+ WRITE1(idxnsg->nndescent_L);
509
+ WRITE1(idxnsg->nndescent_iter);
510
+ write_NSG(&idxnsg->nsg, f);
511
+ write_index(idxnsg->storage, f);
512
+ } else if (
513
+ const IndexPQFastScan* idxpqfs =
514
+ dynamic_cast<const IndexPQFastScan*>(idx)) {
429
515
  uint32_t h = fourcc("IPfs");
430
- WRITE1 (h);
431
- write_index_header (idxpqfs, f);
432
- write_ProductQuantizer (&idxpqfs->pq, f);
433
- WRITE1 (idxpqfs->implem);
434
- WRITE1 (idxpqfs->bbs);
435
- WRITE1 (idxpqfs->qbs);
436
- WRITE1 (idxpqfs->ntotal2);
437
- WRITE1 (idxpqfs->M2);
438
- WRITEVECTOR (idxpqfs->codes);
439
- } else if (const IndexIVFPQFastScan * ivpq =
440
- dynamic_cast<const IndexIVFPQFastScan *> (idx)) {
441
- uint32_t h = fourcc ("IwPf");
442
- WRITE1 (h);
443
- write_ivf_header (ivpq, f);
444
- WRITE1 (ivpq->by_residual);
445
- WRITE1 (ivpq->code_size);
446
- WRITE1 (ivpq->bbs);
447
- WRITE1 (ivpq->M2);
448
- WRITE1 (ivpq->implem);
449
- WRITE1 (ivpq->qbs2);
450
- write_ProductQuantizer (&ivpq->pq, f);
451
- write_InvertedLists (ivpq->invlists, f);
516
+ WRITE1(h);
517
+ write_index_header(idxpqfs, f);
518
+ write_ProductQuantizer(&idxpqfs->pq, f);
519
+ WRITE1(idxpqfs->implem);
520
+ WRITE1(idxpqfs->bbs);
521
+ WRITE1(idxpqfs->qbs);
522
+ WRITE1(idxpqfs->ntotal2);
523
+ WRITE1(idxpqfs->M2);
524
+ WRITEVECTOR(idxpqfs->codes);
525
+ } else if (
526
+ const IndexIVFPQFastScan* ivpq =
527
+ dynamic_cast<const IndexIVFPQFastScan*>(idx)) {
528
+ uint32_t h = fourcc("IwPf");
529
+ WRITE1(h);
530
+ write_ivf_header(ivpq, f);
531
+ WRITE1(ivpq->by_residual);
532
+ WRITE1(ivpq->code_size);
533
+ WRITE1(ivpq->bbs);
534
+ WRITE1(ivpq->M2);
535
+ WRITE1(ivpq->implem);
536
+ WRITE1(ivpq->qbs2);
537
+ write_ProductQuantizer(&ivpq->pq, f);
538
+ write_InvertedLists(ivpq->invlists, f);
452
539
  } else {
453
- FAISS_THROW_MSG ("don't know how to serialize this type of index");
540
+ FAISS_THROW_MSG("don't know how to serialize this type of index");
454
541
  }
455
542
  }
456
543
 
457
- void write_index (const Index *idx, FILE *f) {
544
+ void write_index(const Index* idx, FILE* f) {
458
545
  FileIOWriter writer(f);
459
- write_index (idx, &writer);
546
+ write_index(idx, &writer);
460
547
  }
461
548
 
462
- void write_index (const Index *idx, const char *fname) {
549
+ void write_index(const Index* idx, const char* fname) {
463
550
  FileIOWriter writer(fname);
464
- write_index (idx, &writer);
551
+ write_index(idx, &writer);
465
552
  }
466
553
 
467
- void write_VectorTransform (const VectorTransform *vt, const char *fname) {
554
+ void write_VectorTransform(const VectorTransform* vt, const char* fname) {
468
555
  FileIOWriter writer(fname);
469
- write_VectorTransform (vt, &writer);
556
+ write_VectorTransform(vt, &writer);
470
557
  }
471
558
 
472
-
473
559
  /*************************************************************
474
560
  * Write binary indexes
475
561
  **************************************************************/
476
562
 
477
-
478
- static void write_index_binary_header (const IndexBinary *idx, IOWriter *f) {
479
- WRITE1 (idx->d);
480
- WRITE1 (idx->code_size);
481
- WRITE1 (idx->ntotal);
482
- WRITE1 (idx->is_trained);
483
- WRITE1 (idx->metric_type);
563
+ static void write_index_binary_header(const IndexBinary* idx, IOWriter* f) {
564
+ WRITE1(idx->d);
565
+ WRITE1(idx->code_size);
566
+ WRITE1(idx->ntotal);
567
+ WRITE1(idx->is_trained);
568
+ WRITE1(idx->metric_type);
484
569
  }
485
570
 
486
- static void write_binary_ivf_header (const IndexBinaryIVF *ivf, IOWriter *f) {
487
- write_index_binary_header (ivf, f);
488
- WRITE1 (ivf->nlist);
489
- WRITE1 (ivf->nprobe);
490
- write_index_binary (ivf->quantizer, f);
491
- write_direct_map (&ivf->direct_map, f);
571
+ static void write_binary_ivf_header(const IndexBinaryIVF* ivf, IOWriter* f) {
572
+ write_index_binary_header(ivf, f);
573
+ WRITE1(ivf->nlist);
574
+ WRITE1(ivf->nprobe);
575
+ write_index_binary(ivf->quantizer, f);
576
+ write_direct_map(&ivf->direct_map, f);
492
577
  }
493
578
 
494
- static void write_binary_hash_invlists (
495
- const IndexBinaryHash::InvertedListMap &invlists,
496
- int b, IOWriter *f)
497
- {
579
+ static void write_binary_hash_invlists(
580
+ const IndexBinaryHash::InvertedListMap& invlists,
581
+ int b,
582
+ IOWriter* f) {
498
583
  size_t sz = invlists.size();
499
- WRITE1 (sz);
584
+ WRITE1(sz);
500
585
  size_t maxil = 0;
501
586
  for (auto it = invlists.begin(); it != invlists.end(); ++it) {
502
- if(it->second.ids.size() > maxil) {
587
+ if (it->second.ids.size() > maxil) {
503
588
  maxil = it->second.ids.size();
504
589
  }
505
590
  }
506
591
  int il_nbit = 0;
507
- while(maxil >= ((uint64_t)1 << il_nbit)) {
592
+ while (maxil >= ((uint64_t)1 << il_nbit)) {
508
593
  il_nbit++;
509
594
  }
510
595
  WRITE1(il_nbit);
@@ -513,25 +598,25 @@ static void write_binary_hash_invlists (
513
598
  // memmap it at some point
514
599
 
515
600
  // buffer for bitstrings
516
- std::vector<uint8_t> buf (((b + il_nbit) * sz + 7) / 8);
517
- BitstringWriter wr (buf.data(), buf.size());
601
+ std::vector<uint8_t> buf(((b + il_nbit) * sz + 7) / 8);
602
+ BitstringWriter wr(buf.data(), buf.size());
518
603
  for (auto it = invlists.begin(); it != invlists.end(); ++it) {
519
- wr.write (it->first, b);
520
- wr.write (it->second.ids.size(), il_nbit);
604
+ wr.write(it->first, b);
605
+ wr.write(it->second.ids.size(), il_nbit);
521
606
  }
522
- WRITEVECTOR (buf);
607
+ WRITEVECTOR(buf);
523
608
 
524
609
  for (auto it = invlists.begin(); it != invlists.end(); ++it) {
525
- WRITEVECTOR (it->second.ids);
526
- WRITEVECTOR (it->second.vecs);
610
+ WRITEVECTOR(it->second.ids);
611
+ WRITEVECTOR(it->second.vecs);
527
612
  }
528
613
  }
529
614
 
530
615
  static void write_binary_multi_hash_map(
531
- const IndexBinaryMultiHash::Map &map,
532
- int b, size_t ntotal,
533
- IOWriter *f)
534
- {
616
+ const IndexBinaryMultiHash::Map& map,
617
+ int b,
618
+ size_t ntotal,
619
+ IOWriter* f) {
535
620
  int id_bits = 0;
536
621
  while ((ntotal > ((Index::idx_t)1 << id_bits))) {
537
622
  id_bits++;
@@ -541,7 +626,7 @@ static void write_binary_multi_hash_map(
541
626
  WRITE1(sz);
542
627
  size_t nbit = (b + id_bits) * sz + ntotal * id_bits;
543
628
  std::vector<uint8_t> buf((nbit + 7) / 8);
544
- BitstringWriter wr (buf.data(), buf.size());
629
+ BitstringWriter wr(buf.data(), buf.size());
545
630
  for (auto it = map.begin(); it != map.end(); ++it) {
546
631
  wr.write(it->first, b);
547
632
  wr.write(it->second.size(), id_bits);
@@ -549,80 +634,85 @@ static void write_binary_multi_hash_map(
549
634
  wr.write(id, id_bits);
550
635
  }
551
636
  }
552
- WRITEVECTOR (buf);
637
+ WRITEVECTOR(buf);
553
638
  }
554
639
 
555
- void write_index_binary (const IndexBinary *idx, IOWriter *f) {
556
- if (const IndexBinaryFlat *idxf =
557
- dynamic_cast<const IndexBinaryFlat *> (idx)) {
558
- uint32_t h = fourcc ("IBxF");
559
- WRITE1 (h);
560
- write_index_binary_header (idx, f);
561
- WRITEVECTOR (idxf->xb);
562
- } else if (const IndexBinaryIVF *ivf =
563
- dynamic_cast<const IndexBinaryIVF *> (idx)) {
564
- uint32_t h = fourcc ("IBwF");
565
- WRITE1 (h);
566
- write_binary_ivf_header (ivf, f);
567
- write_InvertedLists (ivf->invlists, f);
568
- } else if(const IndexBinaryFromFloat * idxff =
569
- dynamic_cast<const IndexBinaryFromFloat *> (idx)) {
570
- uint32_t h = fourcc ("IBFf");
571
- WRITE1 (h);
572
- write_index_binary_header (idxff, f);
573
- write_index (idxff->index, f);
574
- } else if (const IndexBinaryHNSW *idxhnsw =
575
- dynamic_cast<const IndexBinaryHNSW *> (idx)) {
576
- uint32_t h = fourcc ("IBHf");
577
- WRITE1 (h);
578
- write_index_binary_header (idxhnsw, f);
579
- write_HNSW (&idxhnsw->hnsw, f);
580
- write_index_binary (idxhnsw->storage, f);
581
- } else if(const IndexBinaryIDMap * idxmap =
582
- dynamic_cast<const IndexBinaryIDMap *> (idx)) {
583
- uint32_t h =
584
- dynamic_cast<const IndexBinaryIDMap2 *> (idx) ? fourcc ("IBM2") :
585
- fourcc ("IBMp");
640
+ void write_index_binary(const IndexBinary* idx, IOWriter* f) {
641
+ if (const IndexBinaryFlat* idxf =
642
+ dynamic_cast<const IndexBinaryFlat*>(idx)) {
643
+ uint32_t h = fourcc("IBxF");
644
+ WRITE1(h);
645
+ write_index_binary_header(idx, f);
646
+ WRITEVECTOR(idxf->xb);
647
+ } else if (
648
+ const IndexBinaryIVF* ivf =
649
+ dynamic_cast<const IndexBinaryIVF*>(idx)) {
650
+ uint32_t h = fourcc("IBwF");
651
+ WRITE1(h);
652
+ write_binary_ivf_header(ivf, f);
653
+ write_InvertedLists(ivf->invlists, f);
654
+ } else if (
655
+ const IndexBinaryFromFloat* idxff =
656
+ dynamic_cast<const IndexBinaryFromFloat*>(idx)) {
657
+ uint32_t h = fourcc("IBFf");
658
+ WRITE1(h);
659
+ write_index_binary_header(idxff, f);
660
+ write_index(idxff->index, f);
661
+ } else if (
662
+ const IndexBinaryHNSW* idxhnsw =
663
+ dynamic_cast<const IndexBinaryHNSW*>(idx)) {
664
+ uint32_t h = fourcc("IBHf");
665
+ WRITE1(h);
666
+ write_index_binary_header(idxhnsw, f);
667
+ write_HNSW(&idxhnsw->hnsw, f);
668
+ write_index_binary(idxhnsw->storage, f);
669
+ } else if (
670
+ const IndexBinaryIDMap* idxmap =
671
+ dynamic_cast<const IndexBinaryIDMap*>(idx)) {
672
+ uint32_t h = dynamic_cast<const IndexBinaryIDMap2*>(idx)
673
+ ? fourcc("IBM2")
674
+ : fourcc("IBMp");
586
675
  // no need to store additional info for IndexIDMap2
587
- WRITE1 (h);
588
- write_index_binary_header (idxmap, f);
589
- write_index_binary (idxmap->index, f);
590
- WRITEVECTOR (idxmap->id_map);
591
- } else if (const IndexBinaryHash *idxh =
592
- dynamic_cast<const IndexBinaryHash *> (idx)) {
593
- uint32_t h = fourcc ("IBHh");
594
- WRITE1 (h);
595
- write_index_binary_header (idxh, f);
596
- WRITE1 (idxh->b);
597
- WRITE1 (idxh->nflip);
676
+ WRITE1(h);
677
+ write_index_binary_header(idxmap, f);
678
+ write_index_binary(idxmap->index, f);
679
+ WRITEVECTOR(idxmap->id_map);
680
+ } else if (
681
+ const IndexBinaryHash* idxh =
682
+ dynamic_cast<const IndexBinaryHash*>(idx)) {
683
+ uint32_t h = fourcc("IBHh");
684
+ WRITE1(h);
685
+ write_index_binary_header(idxh, f);
686
+ WRITE1(idxh->b);
687
+ WRITE1(idxh->nflip);
598
688
  write_binary_hash_invlists(idxh->invlists, idxh->b, f);
599
- } else if (const IndexBinaryMultiHash *idxmh =
600
- dynamic_cast<const IndexBinaryMultiHash *> (idx)) {
601
- uint32_t h = fourcc ("IBHm");
602
- WRITE1 (h);
603
- write_index_binary_header (idxmh, f);
604
- write_index_binary (idxmh->storage, f);
605
- WRITE1 (idxmh->b);
606
- WRITE1 (idxmh->nhash);
607
- WRITE1 (idxmh->nflip);
689
+ } else if (
690
+ const IndexBinaryMultiHash* idxmh =
691
+ dynamic_cast<const IndexBinaryMultiHash*>(idx)) {
692
+ uint32_t h = fourcc("IBHm");
693
+ WRITE1(h);
694
+ write_index_binary_header(idxmh, f);
695
+ write_index_binary(idxmh->storage, f);
696
+ WRITE1(idxmh->b);
697
+ WRITE1(idxmh->nhash);
698
+ WRITE1(idxmh->nflip);
608
699
  for (int i = 0; i < idxmh->nhash; i++) {
609
700
  write_binary_multi_hash_map(
610
701
  idxmh->maps[i], idxmh->b, idxmh->ntotal, f);
611
702
  }
612
703
  } else {
613
- FAISS_THROW_MSG ("don't know how to serialize this type of index");
704
+ FAISS_THROW_MSG("don't know how to serialize this type of index");
614
705
  }
615
706
  }
616
707
 
617
- void write_index_binary (const IndexBinary *idx, FILE *f) {
708
+ void write_index_binary(const IndexBinary* idx, FILE* f) {
618
709
  FileIOWriter writer(f);
619
710
  write_index_binary(idx, &writer);
620
711
  }
621
712
 
622
- void write_index_binary (const IndexBinary *idx, const char *fname) {
713
+ void write_index_binary(const IndexBinary* idx, const char* fname) {
623
714
  FileIOWriter writer(fname);
624
- write_index_binary (idx, &writer);
715
+ write_index_binary(idx, &writer);
625
716
  }
626
717
 
627
-
628
718
  } // namespace faiss