faiss 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +103 -3
  4. data/ext/faiss/ext.cpp +99 -32
  5. data/ext/faiss/extconf.rb +12 -2
  6. data/lib/faiss/ext.bundle +0 -0
  7. data/lib/faiss/index.rb +3 -3
  8. data/lib/faiss/index_binary.rb +3 -3
  9. data/lib/faiss/kmeans.rb +1 -1
  10. data/lib/faiss/pca_matrix.rb +2 -2
  11. data/lib/faiss/product_quantizer.rb +3 -3
  12. data/lib/faiss/version.rb +1 -1
  13. data/vendor/faiss/AutoTune.cpp +719 -0
  14. data/vendor/faiss/AutoTune.h +212 -0
  15. data/vendor/faiss/Clustering.cpp +261 -0
  16. data/vendor/faiss/Clustering.h +101 -0
  17. data/vendor/faiss/IVFlib.cpp +339 -0
  18. data/vendor/faiss/IVFlib.h +132 -0
  19. data/vendor/faiss/Index.cpp +171 -0
  20. data/vendor/faiss/Index.h +261 -0
  21. data/vendor/faiss/Index2Layer.cpp +437 -0
  22. data/vendor/faiss/Index2Layer.h +85 -0
  23. data/vendor/faiss/IndexBinary.cpp +77 -0
  24. data/vendor/faiss/IndexBinary.h +163 -0
  25. data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
  26. data/vendor/faiss/IndexBinaryFlat.h +54 -0
  27. data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
  28. data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
  29. data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
  30. data/vendor/faiss/IndexBinaryHNSW.h +56 -0
  31. data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
  32. data/vendor/faiss/IndexBinaryIVF.h +211 -0
  33. data/vendor/faiss/IndexFlat.cpp +508 -0
  34. data/vendor/faiss/IndexFlat.h +175 -0
  35. data/vendor/faiss/IndexHNSW.cpp +1090 -0
  36. data/vendor/faiss/IndexHNSW.h +170 -0
  37. data/vendor/faiss/IndexIVF.cpp +909 -0
  38. data/vendor/faiss/IndexIVF.h +353 -0
  39. data/vendor/faiss/IndexIVFFlat.cpp +502 -0
  40. data/vendor/faiss/IndexIVFFlat.h +118 -0
  41. data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
  42. data/vendor/faiss/IndexIVFPQ.h +161 -0
  43. data/vendor/faiss/IndexIVFPQR.cpp +219 -0
  44. data/vendor/faiss/IndexIVFPQR.h +65 -0
  45. data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
  46. data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
  47. data/vendor/faiss/IndexLSH.cpp +225 -0
  48. data/vendor/faiss/IndexLSH.h +87 -0
  49. data/vendor/faiss/IndexLattice.cpp +143 -0
  50. data/vendor/faiss/IndexLattice.h +68 -0
  51. data/vendor/faiss/IndexPQ.cpp +1188 -0
  52. data/vendor/faiss/IndexPQ.h +199 -0
  53. data/vendor/faiss/IndexPreTransform.cpp +288 -0
  54. data/vendor/faiss/IndexPreTransform.h +91 -0
  55. data/vendor/faiss/IndexReplicas.cpp +123 -0
  56. data/vendor/faiss/IndexReplicas.h +76 -0
  57. data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
  58. data/vendor/faiss/IndexScalarQuantizer.h +127 -0
  59. data/vendor/faiss/IndexShards.cpp +317 -0
  60. data/vendor/faiss/IndexShards.h +100 -0
  61. data/vendor/faiss/InvertedLists.cpp +623 -0
  62. data/vendor/faiss/InvertedLists.h +334 -0
  63. data/vendor/faiss/LICENSE +21 -0
  64. data/vendor/faiss/MatrixStats.cpp +252 -0
  65. data/vendor/faiss/MatrixStats.h +62 -0
  66. data/vendor/faiss/MetaIndexes.cpp +351 -0
  67. data/vendor/faiss/MetaIndexes.h +126 -0
  68. data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
  69. data/vendor/faiss/OnDiskInvertedLists.h +127 -0
  70. data/vendor/faiss/VectorTransform.cpp +1157 -0
  71. data/vendor/faiss/VectorTransform.h +322 -0
  72. data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
  73. data/vendor/faiss/c_api/AutoTune_c.h +64 -0
  74. data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
  75. data/vendor/faiss/c_api/Clustering_c.h +117 -0
  76. data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
  77. data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
  78. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
  79. data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
  80. data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
  81. data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
  82. data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
  83. data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
  84. data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
  85. data/vendor/faiss/c_api/IndexShards_c.h +42 -0
  86. data/vendor/faiss/c_api/Index_c.cpp +105 -0
  87. data/vendor/faiss/c_api/Index_c.h +183 -0
  88. data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
  89. data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
  90. data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
  91. data/vendor/faiss/c_api/clone_index_c.h +32 -0
  92. data/vendor/faiss/c_api/error_c.h +42 -0
  93. data/vendor/faiss/c_api/error_impl.cpp +27 -0
  94. data/vendor/faiss/c_api/error_impl.h +16 -0
  95. data/vendor/faiss/c_api/faiss_c.h +58 -0
  96. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
  97. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
  98. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
  99. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
  100. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
  101. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
  102. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
  103. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
  104. data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
  105. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
  106. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
  107. data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
  108. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
  109. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
  110. data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
  111. data/vendor/faiss/c_api/index_factory_c.h +30 -0
  112. data/vendor/faiss/c_api/index_io_c.cpp +42 -0
  113. data/vendor/faiss/c_api/index_io_c.h +50 -0
  114. data/vendor/faiss/c_api/macros_impl.h +110 -0
  115. data/vendor/faiss/clone_index.cpp +147 -0
  116. data/vendor/faiss/clone_index.h +38 -0
  117. data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
  118. data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
  119. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
  120. data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
  121. data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
  122. data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
  123. data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
  124. data/vendor/faiss/gpu/GpuCloner.h +82 -0
  125. data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
  126. data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
  127. data/vendor/faiss/gpu/GpuDistance.h +52 -0
  128. data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
  129. data/vendor/faiss/gpu/GpuIndex.h +148 -0
  130. data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
  131. data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
  132. data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
  133. data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
  134. data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
  135. data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
  136. data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
  137. data/vendor/faiss/gpu/GpuResources.cpp +52 -0
  138. data/vendor/faiss/gpu/GpuResources.h +73 -0
  139. data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
  140. data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
  141. data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
  142. data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
  143. data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
  144. data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
  145. data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
  146. data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
  147. data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
  148. data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
  149. data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
  150. data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
  151. data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
  152. data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
  153. data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
  154. data/vendor/faiss/gpu/test/TestUtils.h +93 -0
  155. data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
  156. data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
  157. data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
  158. data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
  159. data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
  160. data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
  161. data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
  162. data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
  163. data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
  164. data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
  165. data/vendor/faiss/gpu/utils/Timer.h +52 -0
  166. data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
  167. data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
  168. data/vendor/faiss/impl/FaissAssert.h +95 -0
  169. data/vendor/faiss/impl/FaissException.cpp +66 -0
  170. data/vendor/faiss/impl/FaissException.h +71 -0
  171. data/vendor/faiss/impl/HNSW.cpp +818 -0
  172. data/vendor/faiss/impl/HNSW.h +275 -0
  173. data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
  174. data/vendor/faiss/impl/PolysemousTraining.h +158 -0
  175. data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
  176. data/vendor/faiss/impl/ProductQuantizer.h +242 -0
  177. data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
  178. data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
  179. data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
  180. data/vendor/faiss/impl/ThreadedIndex.h +80 -0
  181. data/vendor/faiss/impl/index_read.cpp +793 -0
  182. data/vendor/faiss/impl/index_write.cpp +558 -0
  183. data/vendor/faiss/impl/io.cpp +142 -0
  184. data/vendor/faiss/impl/io.h +98 -0
  185. data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
  186. data/vendor/faiss/impl/lattice_Zn.h +199 -0
  187. data/vendor/faiss/index_factory.cpp +392 -0
  188. data/vendor/faiss/index_factory.h +25 -0
  189. data/vendor/faiss/index_io.h +75 -0
  190. data/vendor/faiss/misc/test_blas.cpp +84 -0
  191. data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
  192. data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
  193. data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
  194. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
  195. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
  196. data/vendor/faiss/tests/test_merge.cpp +258 -0
  197. data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
  198. data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
  199. data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
  200. data/vendor/faiss/tests/test_params_override.cpp +231 -0
  201. data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
  202. data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
  203. data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
  204. data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
  205. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
  206. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
  207. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
  208. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
  209. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
  210. data/vendor/faiss/utils/Heap.cpp +122 -0
  211. data/vendor/faiss/utils/Heap.h +495 -0
  212. data/vendor/faiss/utils/WorkerThread.cpp +126 -0
  213. data/vendor/faiss/utils/WorkerThread.h +61 -0
  214. data/vendor/faiss/utils/distances.cpp +765 -0
  215. data/vendor/faiss/utils/distances.h +243 -0
  216. data/vendor/faiss/utils/distances_simd.cpp +809 -0
  217. data/vendor/faiss/utils/extra_distances.cpp +336 -0
  218. data/vendor/faiss/utils/extra_distances.h +54 -0
  219. data/vendor/faiss/utils/hamming-inl.h +472 -0
  220. data/vendor/faiss/utils/hamming.cpp +792 -0
  221. data/vendor/faiss/utils/hamming.h +220 -0
  222. data/vendor/faiss/utils/random.cpp +192 -0
  223. data/vendor/faiss/utils/random.h +60 -0
  224. data/vendor/faiss/utils/utils.cpp +783 -0
  225. data/vendor/faiss/utils/utils.h +181 -0
  226. metadata +216 -2
@@ -0,0 +1,558 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #include <faiss/index_io.h>
11
+
12
+ #include <cstdio>
13
+ #include <cstdlib>
14
+
15
+ #include <sys/mman.h>
16
+ #include <sys/types.h>
17
+ #include <sys/stat.h>
18
+ #include <unistd.h>
19
+
20
+ #include <faiss/impl/FaissAssert.h>
21
+ #include <faiss/impl/io.h>
22
+
23
+ #include <faiss/IndexFlat.h>
24
+ #include <faiss/VectorTransform.h>
25
+ #include <faiss/IndexPreTransform.h>
26
+ #include <faiss/IndexLSH.h>
27
+ #include <faiss/IndexPQ.h>
28
+ #include <faiss/IndexIVF.h>
29
+ #include <faiss/IndexIVFPQ.h>
30
+ #include <faiss/IndexIVFPQR.h>
31
+ #include <faiss/Index2Layer.h>
32
+ #include <faiss/IndexIVFFlat.h>
33
+ #include <faiss/IndexIVFSpectralHash.h>
34
+ #include <faiss/MetaIndexes.h>
35
+ #include <faiss/IndexScalarQuantizer.h>
36
+ #include <faiss/IndexHNSW.h>
37
+ #include <faiss/IndexLattice.h>
38
+
39
+ #include <faiss/OnDiskInvertedLists.h>
40
+ #include <faiss/IndexBinaryFlat.h>
41
+ #include <faiss/IndexBinaryFromFloat.h>
42
+ #include <faiss/IndexBinaryHNSW.h>
43
+ #include <faiss/IndexBinaryIVF.h>
44
+
45
+
46
+
47
+ /*************************************************************
48
+ * The I/O format is the content of the class. For objects that are
49
+ * inherited, like Index, a 4-character-code (fourcc) indicates which
50
+ * child class this is an instance of.
51
+ *
52
+ * In this case, the fields of the parent class are written first,
53
+ * then the ones for the child classes. Note that this requires
54
+ * classes to be serialized to have a constructor without parameters,
55
+ * so that the fields can be filled in later. The default constructor
56
+ * should set reasonable defaults for all fields.
57
+ *
58
+ * The fourccs are assigned arbitrarily. When the class changed (added
59
+ * or deprecated fields), the fourcc can be replaced. New code should
60
+ * be able to read the old fourcc and fill in new classes.
61
+ *
62
+ * TODO: serialization to strings for use in Python pickle or Torch
63
+ * serialization.
64
+ *
65
+ * TODO: in this file, the read functions that encouter errors may
66
+ * leak memory.
67
+ **************************************************************/
68
+
69
+
70
+
71
+ namespace faiss {
72
+
73
+
74
+ /*************************************************************
75
+ * I/O macros
76
+ *
77
+ * we use macros so that we have a line number to report in abort
78
+ * (). This makes debugging a lot easier. The IOReader or IOWriter is
79
+ * always called f and thus is not passed in as a macro parameter.
80
+ **************************************************************/
81
+
82
+
83
+ #define WRITEANDCHECK(ptr, n) { \
84
+ size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
85
+ FAISS_THROW_IF_NOT_FMT(ret == (n), \
86
+ "write error in %s: %ld != %ld (%s)", \
87
+ f->name.c_str(), ret, size_t(n), strerror(errno)); \
88
+ }
89
+
90
+ #define WRITE1(x) WRITEANDCHECK(&(x), 1)
91
+
92
+ #define WRITEVECTOR(vec) { \
93
+ size_t size = (vec).size (); \
94
+ WRITEANDCHECK (&size, 1); \
95
+ WRITEANDCHECK ((vec).data (), size); \
96
+ }
97
+
98
+
99
+
100
+ /*************************************************************
101
+ * Write
102
+ **************************************************************/
103
+ static void write_index_header (const Index *idx, IOWriter *f) {
104
+ WRITE1 (idx->d);
105
+ WRITE1 (idx->ntotal);
106
+ Index::idx_t dummy = 1 << 20;
107
+ WRITE1 (dummy);
108
+ WRITE1 (dummy);
109
+ WRITE1 (idx->is_trained);
110
+ WRITE1 (idx->metric_type);
111
+ if (idx->metric_type > 1) {
112
+ WRITE1 (idx->metric_arg);
113
+ }
114
+ }
115
+
116
+ void write_VectorTransform (const VectorTransform *vt, IOWriter *f) {
117
+ if (const LinearTransform * lt =
118
+ dynamic_cast < const LinearTransform *> (vt)) {
119
+ if (dynamic_cast<const RandomRotationMatrix *>(lt)) {
120
+ uint32_t h = fourcc ("rrot");
121
+ WRITE1 (h);
122
+ } else if (const PCAMatrix * pca =
123
+ dynamic_cast<const PCAMatrix *>(lt)) {
124
+ uint32_t h = fourcc ("PcAm");
125
+ WRITE1 (h);
126
+ WRITE1 (pca->eigen_power);
127
+ WRITE1 (pca->random_rotation);
128
+ WRITE1 (pca->balanced_bins);
129
+ WRITEVECTOR (pca->mean);
130
+ WRITEVECTOR (pca->eigenvalues);
131
+ WRITEVECTOR (pca->PCAMat);
132
+ } else if (const ITQMatrix * itqm =
133
+ dynamic_cast<const ITQMatrix *>(lt)) {
134
+ uint32_t h = fourcc ("Viqm");
135
+ WRITE1 (h);
136
+ WRITE1 (itqm->max_iter);
137
+ WRITE1 (itqm->seed);
138
+ } else {
139
+ // generic LinearTransform (includes OPQ)
140
+ uint32_t h = fourcc ("LTra");
141
+ WRITE1 (h);
142
+ }
143
+ WRITE1 (lt->have_bias);
144
+ WRITEVECTOR (lt->A);
145
+ WRITEVECTOR (lt->b);
146
+ } else if (const RemapDimensionsTransform *rdt =
147
+ dynamic_cast<const RemapDimensionsTransform *>(vt)) {
148
+ uint32_t h = fourcc ("RmDT");
149
+ WRITE1 (h);
150
+ WRITEVECTOR (rdt->map);
151
+ } else if (const NormalizationTransform *nt =
152
+ dynamic_cast<const NormalizationTransform *>(vt)) {
153
+ uint32_t h = fourcc ("VNrm");
154
+ WRITE1 (h);
155
+ WRITE1 (nt->norm);
156
+ } else if (const CenteringTransform *ct =
157
+ dynamic_cast<const CenteringTransform *>(vt)) {
158
+ uint32_t h = fourcc ("VCnt");
159
+ WRITE1 (h);
160
+ WRITEVECTOR (ct->mean);
161
+ } else if (const ITQTransform *itqt =
162
+ dynamic_cast<const ITQTransform*> (vt)) {
163
+ uint32_t h = fourcc ("Viqt");
164
+ WRITE1 (h);
165
+ WRITEVECTOR (itqt->mean);
166
+ WRITE1 (itqt->do_pca);
167
+ write_VectorTransform (&itqt->itq, f);
168
+ write_VectorTransform (&itqt->pca_then_itq, f);
169
+ } else {
170
+ FAISS_THROW_MSG ("cannot serialize this");
171
+ }
172
+ // common fields
173
+ WRITE1 (vt->d_in);
174
+ WRITE1 (vt->d_out);
175
+ WRITE1 (vt->is_trained);
176
+ }
177
+
178
+ void write_ProductQuantizer (const ProductQuantizer *pq, IOWriter *f) {
179
+ WRITE1 (pq->d);
180
+ WRITE1 (pq->M);
181
+ WRITE1 (pq->nbits);
182
+ WRITEVECTOR (pq->centroids);
183
+ }
184
+
185
+ static void write_ScalarQuantizer (
186
+ const ScalarQuantizer *ivsc, IOWriter *f) {
187
+ WRITE1 (ivsc->qtype);
188
+ WRITE1 (ivsc->rangestat);
189
+ WRITE1 (ivsc->rangestat_arg);
190
+ WRITE1 (ivsc->d);
191
+ WRITE1 (ivsc->code_size);
192
+ WRITEVECTOR (ivsc->trained);
193
+ }
194
+
195
+ void write_InvertedLists (const InvertedLists *ils, IOWriter *f) {
196
+ if (ils == nullptr) {
197
+ uint32_t h = fourcc ("il00");
198
+ WRITE1 (h);
199
+ } else if (const auto & ails =
200
+ dynamic_cast<const ArrayInvertedLists *>(ils)) {
201
+ uint32_t h = fourcc ("ilar");
202
+ WRITE1 (h);
203
+ WRITE1 (ails->nlist);
204
+ WRITE1 (ails->code_size);
205
+ // here we store either as a full or a sparse data buffer
206
+ size_t n_non0 = 0;
207
+ for (size_t i = 0; i < ails->nlist; i++) {
208
+ if (ails->ids[i].size() > 0)
209
+ n_non0++;
210
+ }
211
+ if (n_non0 > ails->nlist / 2) {
212
+ uint32_t list_type = fourcc("full");
213
+ WRITE1 (list_type);
214
+ std::vector<size_t> sizes;
215
+ for (size_t i = 0; i < ails->nlist; i++) {
216
+ sizes.push_back (ails->ids[i].size());
217
+ }
218
+ WRITEVECTOR (sizes);
219
+ } else {
220
+ int list_type = fourcc("sprs"); // sparse
221
+ WRITE1 (list_type);
222
+ std::vector<size_t> sizes;
223
+ for (size_t i = 0; i < ails->nlist; i++) {
224
+ size_t n = ails->ids[i].size();
225
+ if (n > 0) {
226
+ sizes.push_back (i);
227
+ sizes.push_back (n);
228
+ }
229
+ }
230
+ WRITEVECTOR (sizes);
231
+ }
232
+ // make a single contiguous data buffer (useful for mmapping)
233
+ for (size_t i = 0; i < ails->nlist; i++) {
234
+ size_t n = ails->ids[i].size();
235
+ if (n > 0) {
236
+ WRITEANDCHECK (ails->codes[i].data(), n * ails->code_size);
237
+ WRITEANDCHECK (ails->ids[i].data(), n);
238
+ }
239
+ }
240
+ } else if (const auto & od =
241
+ dynamic_cast<const OnDiskInvertedLists *>(ils)) {
242
+ uint32_t h = fourcc ("ilod");
243
+ WRITE1 (h);
244
+ WRITE1 (ils->nlist);
245
+ WRITE1 (ils->code_size);
246
+ // this is a POD object
247
+ WRITEVECTOR (od->lists);
248
+
249
+ {
250
+ std::vector<OnDiskInvertedLists::Slot> v(
251
+ od->slots.begin(), od->slots.end());
252
+ WRITEVECTOR(v);
253
+ }
254
+ {
255
+ std::vector<char> x(od->filename.begin(), od->filename.end());
256
+ WRITEVECTOR(x);
257
+ }
258
+ WRITE1(od->totsize);
259
+
260
+ } else {
261
+ fprintf(stderr, "WARN! write_InvertedLists: unsupported invlist type, "
262
+ "saving null invlist\n");
263
+ uint32_t h = fourcc ("il00");
264
+ WRITE1 (h);
265
+ }
266
+ }
267
+
268
+
269
+ void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) {
270
+ FileIOWriter writer(fname);
271
+ write_ProductQuantizer (pq, &writer);
272
+ }
273
+
274
+ static void write_HNSW (const HNSW *hnsw, IOWriter *f) {
275
+
276
+ WRITEVECTOR (hnsw->assign_probas);
277
+ WRITEVECTOR (hnsw->cum_nneighbor_per_level);
278
+ WRITEVECTOR (hnsw->levels);
279
+ WRITEVECTOR (hnsw->offsets);
280
+ WRITEVECTOR (hnsw->neighbors);
281
+
282
+ WRITE1 (hnsw->entry_point);
283
+ WRITE1 (hnsw->max_level);
284
+ WRITE1 (hnsw->efConstruction);
285
+ WRITE1 (hnsw->efSearch);
286
+ WRITE1 (hnsw->upper_beam);
287
+ }
288
+
289
+ static void write_ivf_header (const IndexIVF *ivf, IOWriter *f) {
290
+ write_index_header (ivf, f);
291
+ WRITE1 (ivf->nlist);
292
+ WRITE1 (ivf->nprobe);
293
+ write_index (ivf->quantizer, f);
294
+ WRITE1 (ivf->maintain_direct_map);
295
+ WRITEVECTOR (ivf->direct_map);
296
+ }
297
+
298
+ void write_index (const Index *idx, IOWriter *f) {
299
+ if (const IndexFlat * idxf = dynamic_cast<const IndexFlat *> (idx)) {
300
+ uint32_t h = fourcc (
301
+ idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" :
302
+ idxf->metric_type == METRIC_L2 ? "IxF2" : nullptr);
303
+ WRITE1 (h);
304
+ write_index_header (idx, f);
305
+ WRITEVECTOR (idxf->xb);
306
+ } else if(const IndexLSH * idxl = dynamic_cast<const IndexLSH *> (idx)) {
307
+ uint32_t h = fourcc ("IxHe");
308
+ WRITE1 (h);
309
+ write_index_header (idx, f);
310
+ WRITE1 (idxl->nbits);
311
+ WRITE1 (idxl->rotate_data);
312
+ WRITE1 (idxl->train_thresholds);
313
+ WRITEVECTOR (idxl->thresholds);
314
+ WRITE1 (idxl->bytes_per_vec);
315
+ write_VectorTransform (&idxl->rrot, f);
316
+ WRITEVECTOR (idxl->codes);
317
+ } else if(const IndexPQ * idxp = dynamic_cast<const IndexPQ *> (idx)) {
318
+ uint32_t h = fourcc ("IxPq");
319
+ WRITE1 (h);
320
+ write_index_header (idx, f);
321
+ write_ProductQuantizer (&idxp->pq, f);
322
+ WRITEVECTOR (idxp->codes);
323
+ // search params -- maybe not useful to store?
324
+ WRITE1 (idxp->search_type);
325
+ WRITE1 (idxp->encode_signs);
326
+ WRITE1 (idxp->polysemous_ht);
327
+ } else if(const Index2Layer * idxp =
328
+ dynamic_cast<const Index2Layer *> (idx)) {
329
+ uint32_t h = fourcc ("Ix2L");
330
+ WRITE1 (h);
331
+ write_index_header (idx, f);
332
+ write_index (idxp->q1.quantizer, f);
333
+ WRITE1 (idxp->q1.nlist);
334
+ WRITE1 (idxp->q1.quantizer_trains_alone);
335
+ write_ProductQuantizer (&idxp->pq, f);
336
+ WRITE1 (idxp->code_size_1);
337
+ WRITE1 (idxp->code_size_2);
338
+ WRITE1 (idxp->code_size);
339
+ WRITEVECTOR (idxp->codes);
340
+ } else if(const IndexScalarQuantizer * idxs =
341
+ dynamic_cast<const IndexScalarQuantizer *> (idx)) {
342
+ uint32_t h = fourcc ("IxSQ");
343
+ WRITE1 (h);
344
+ write_index_header (idx, f);
345
+ write_ScalarQuantizer (&idxs->sq, f);
346
+ WRITEVECTOR (idxs->codes);
347
+ } else if(const IndexLattice * idxl =
348
+ dynamic_cast<const IndexLattice *> (idx)) {
349
+ uint32_t h = fourcc ("IxLa");
350
+ WRITE1 (h);
351
+ WRITE1 (idxl->d);
352
+ WRITE1 (idxl->nsq);
353
+ WRITE1 (idxl->scale_nbit);
354
+ WRITE1 (idxl->zn_sphere_codec.r2);
355
+ write_index_header (idx, f);
356
+ WRITEVECTOR (idxl->trained);
357
+ } else if(const IndexIVFFlatDedup * ivfl =
358
+ dynamic_cast<const IndexIVFFlatDedup *> (idx)) {
359
+ uint32_t h = fourcc ("IwFd");
360
+ WRITE1 (h);
361
+ write_ivf_header (ivfl, f);
362
+ {
363
+ std::vector<Index::idx_t> tab (2 * ivfl->instances.size());
364
+ long i = 0;
365
+ for (auto it = ivfl->instances.begin();
366
+ it != ivfl->instances.end(); ++it) {
367
+ tab[i++] = it->first;
368
+ tab[i++] = it->second;
369
+ }
370
+ WRITEVECTOR (tab);
371
+ }
372
+ write_InvertedLists (ivfl->invlists, f);
373
+ } else if(const IndexIVFFlat * ivfl =
374
+ dynamic_cast<const IndexIVFFlat *> (idx)) {
375
+ uint32_t h = fourcc ("IwFl");
376
+ WRITE1 (h);
377
+ write_ivf_header (ivfl, f);
378
+ write_InvertedLists (ivfl->invlists, f);
379
+ } else if(const IndexIVFScalarQuantizer * ivsc =
380
+ dynamic_cast<const IndexIVFScalarQuantizer *> (idx)) {
381
+ uint32_t h = fourcc ("IwSq");
382
+ WRITE1 (h);
383
+ write_ivf_header (ivsc, f);
384
+ write_ScalarQuantizer (&ivsc->sq, f);
385
+ WRITE1 (ivsc->code_size);
386
+ WRITE1 (ivsc->by_residual);
387
+ write_InvertedLists (ivsc->invlists, f);
388
+ } else if(const IndexIVFSpectralHash *ivsp =
389
+ dynamic_cast<const IndexIVFSpectralHash *>(idx)) {
390
+ uint32_t h = fourcc ("IwSh");
391
+ WRITE1 (h);
392
+ write_ivf_header (ivsp, f);
393
+ write_VectorTransform (ivsp->vt, f);
394
+ WRITE1 (ivsp->nbit);
395
+ WRITE1 (ivsp->period);
396
+ WRITE1 (ivsp->threshold_type);
397
+ WRITEVECTOR (ivsp->trained);
398
+ write_InvertedLists (ivsp->invlists, f);
399
+ } else if(const IndexIVFPQ * ivpq =
400
+ dynamic_cast<const IndexIVFPQ *> (idx)) {
401
+ const IndexIVFPQR * ivfpqr = dynamic_cast<const IndexIVFPQR *> (idx);
402
+
403
+ uint32_t h = fourcc (ivfpqr ? "IwQR" : "IwPQ");
404
+ WRITE1 (h);
405
+ write_ivf_header (ivpq, f);
406
+ WRITE1 (ivpq->by_residual);
407
+ WRITE1 (ivpq->code_size);
408
+ write_ProductQuantizer (&ivpq->pq, f);
409
+ write_InvertedLists (ivpq->invlists, f);
410
+ if (ivfpqr) {
411
+ write_ProductQuantizer (&ivfpqr->refine_pq, f);
412
+ WRITEVECTOR (ivfpqr->refine_codes);
413
+ WRITE1 (ivfpqr->k_factor);
414
+ }
415
+
416
+ } else if(const IndexPreTransform * ixpt =
417
+ dynamic_cast<const IndexPreTransform *> (idx)) {
418
+ uint32_t h = fourcc ("IxPT");
419
+ WRITE1 (h);
420
+ write_index_header (ixpt, f);
421
+ int nt = ixpt->chain.size();
422
+ WRITE1 (nt);
423
+ for (int i = 0; i < nt; i++)
424
+ write_VectorTransform (ixpt->chain[i], f);
425
+ write_index (ixpt->index, f);
426
+ } else if(const MultiIndexQuantizer * imiq =
427
+ dynamic_cast<const MultiIndexQuantizer *> (idx)) {
428
+ uint32_t h = fourcc ("Imiq");
429
+ WRITE1 (h);
430
+ write_index_header (imiq, f);
431
+ write_ProductQuantizer (&imiq->pq, f);
432
+ } else if(const IndexRefineFlat * idxrf =
433
+ dynamic_cast<const IndexRefineFlat *> (idx)) {
434
+ uint32_t h = fourcc ("IxRF");
435
+ WRITE1 (h);
436
+ write_index_header (idxrf, f);
437
+ write_index (idxrf->base_index, f);
438
+ write_index (&idxrf->refine_index, f);
439
+ WRITE1 (idxrf->k_factor);
440
+ } else if(const IndexIDMap * idxmap =
441
+ dynamic_cast<const IndexIDMap *> (idx)) {
442
+ uint32_t h =
443
+ dynamic_cast<const IndexIDMap2 *> (idx) ? fourcc ("IxM2") :
444
+ fourcc ("IxMp");
445
+ // no need to store additional info for IndexIDMap2
446
+ WRITE1 (h);
447
+ write_index_header (idxmap, f);
448
+ write_index (idxmap->index, f);
449
+ WRITEVECTOR (idxmap->id_map);
450
+ } else if(const IndexHNSW * idxhnsw =
451
+ dynamic_cast<const IndexHNSW *> (idx)) {
452
+ uint32_t h =
453
+ dynamic_cast<const IndexHNSWFlat*>(idx) ? fourcc("IHNf") :
454
+ dynamic_cast<const IndexHNSWPQ*>(idx) ? fourcc("IHNp") :
455
+ dynamic_cast<const IndexHNSWSQ*>(idx) ? fourcc("IHNs") :
456
+ dynamic_cast<const IndexHNSW2Level*>(idx) ? fourcc("IHN2") :
457
+ 0;
458
+ FAISS_THROW_IF_NOT (h != 0);
459
+ WRITE1 (h);
460
+ write_index_header (idxhnsw, f);
461
+ write_HNSW (&idxhnsw->hnsw, f);
462
+ write_index (idxhnsw->storage, f);
463
+ } else {
464
+ FAISS_THROW_MSG ("don't know how to serialize this type of index");
465
+ }
466
+ }
467
+
468
+ void write_index (const Index *idx, FILE *f) {
469
+ FileIOWriter writer(f);
470
+ write_index (idx, &writer);
471
+ }
472
+
473
+ void write_index (const Index *idx, const char *fname) {
474
+ FileIOWriter writer(fname);
475
+ write_index (idx, &writer);
476
+ }
477
+
478
+ void write_VectorTransform (const VectorTransform *vt, const char *fname) {
479
+ FileIOWriter writer(fname);
480
+ write_VectorTransform (vt, &writer);
481
+ }
482
+
483
+
484
+ /*************************************************************
485
+ * Write binary indexes
486
+ **************************************************************/
487
+
488
+
489
+ static void write_index_binary_header (const IndexBinary *idx, IOWriter *f) {
490
+ WRITE1 (idx->d);
491
+ WRITE1 (idx->code_size);
492
+ WRITE1 (idx->ntotal);
493
+ WRITE1 (idx->is_trained);
494
+ WRITE1 (idx->metric_type);
495
+ }
496
+
497
+ static void write_binary_ivf_header (const IndexBinaryIVF *ivf, IOWriter *f) {
498
+ write_index_binary_header (ivf, f);
499
+ WRITE1 (ivf->nlist);
500
+ WRITE1 (ivf->nprobe);
501
+ write_index_binary (ivf->quantizer, f);
502
+ WRITE1 (ivf->maintain_direct_map);
503
+ WRITEVECTOR (ivf->direct_map);
504
+ }
505
+
506
+ void write_index_binary (const IndexBinary *idx, IOWriter *f) {
507
+ if (const IndexBinaryFlat *idxf =
508
+ dynamic_cast<const IndexBinaryFlat *> (idx)) {
509
+ uint32_t h = fourcc ("IBxF");
510
+ WRITE1 (h);
511
+ write_index_binary_header (idx, f);
512
+ WRITEVECTOR (idxf->xb);
513
+ } else if (const IndexBinaryIVF *ivf =
514
+ dynamic_cast<const IndexBinaryIVF *> (idx)) {
515
+ uint32_t h = fourcc ("IBwF");
516
+ WRITE1 (h);
517
+ write_binary_ivf_header (ivf, f);
518
+ write_InvertedLists (ivf->invlists, f);
519
+ } else if(const IndexBinaryFromFloat * idxff =
520
+ dynamic_cast<const IndexBinaryFromFloat *> (idx)) {
521
+ uint32_t h = fourcc ("IBFf");
522
+ WRITE1 (h);
523
+ write_index_binary_header (idxff, f);
524
+ write_index (idxff->index, f);
525
+ } else if (const IndexBinaryHNSW *idxhnsw =
526
+ dynamic_cast<const IndexBinaryHNSW *> (idx)) {
527
+ uint32_t h = fourcc ("IBHf");
528
+ WRITE1 (h);
529
+ write_index_binary_header (idxhnsw, f);
530
+ write_HNSW (&idxhnsw->hnsw, f);
531
+ write_index_binary (idxhnsw->storage, f);
532
+ } else if(const IndexBinaryIDMap * idxmap =
533
+ dynamic_cast<const IndexBinaryIDMap *> (idx)) {
534
+ uint32_t h =
535
+ dynamic_cast<const IndexBinaryIDMap2 *> (idx) ? fourcc ("IBM2") :
536
+ fourcc ("IBMp");
537
+ // no need to store additional info for IndexIDMap2
538
+ WRITE1 (h);
539
+ write_index_binary_header (idxmap, f);
540
+ write_index_binary (idxmap->index, f);
541
+ WRITEVECTOR (idxmap->id_map);
542
+ } else {
543
+ FAISS_THROW_MSG ("don't know how to serialize this type of index");
544
+ }
545
+ }
546
+
547
+ void write_index_binary (const IndexBinary *idx, FILE *f) {
548
+ FileIOWriter writer(f);
549
+ write_index_binary(idx, &writer);
550
+ }
551
+
552
+ void write_index_binary (const IndexBinary *idx, const char *fname) {
553
+ FileIOWriter writer(fname);
554
+ write_index_binary (idx, &writer);
555
+ }
556
+
557
+
558
+ } // namespace faiss