faiss 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (202) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  5. data/vendor/faiss/faiss/AutoTune.h +55 -56
  6. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  7. data/vendor/faiss/faiss/Clustering.h +88 -35
  8. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  9. data/vendor/faiss/faiss/IVFlib.h +48 -51
  10. data/vendor/faiss/faiss/Index.cpp +85 -103
  11. data/vendor/faiss/faiss/Index.h +54 -48
  12. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  13. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  14. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  15. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  16. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  17. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  18. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  19. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  20. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  21. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  22. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  23. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  24. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  25. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  26. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  27. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  29. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  30. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  31. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  32. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  33. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  34. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  35. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  38. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  39. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  42. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  43. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  44. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  45. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  46. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  47. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  48. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  49. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  50. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  51. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  52. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  53. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  54. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  55. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  56. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  57. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  58. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  59. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  60. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  61. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  62. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  63. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  64. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  65. data/vendor/faiss/faiss/IndexShards.h +85 -73
  66. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  67. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  68. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  69. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  70. data/vendor/faiss/faiss/MetricType.h +7 -7
  71. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  72. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  73. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  74. data/vendor/faiss/faiss/clone_index.h +4 -9
  75. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  76. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  77. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  78. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  79. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  80. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  81. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  82. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  83. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  84. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  85. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  86. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  87. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  88. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  89. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  90. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  91. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  92. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  93. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  94. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  95. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  96. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  97. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  98. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  99. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  100. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  101. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  102. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  103. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  104. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  105. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  106. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  107. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  108. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  109. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  110. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  111. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  112. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  113. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  114. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  115. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  116. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  117. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  118. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  119. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  124. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  125. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  126. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  127. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  128. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  131. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  134. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  135. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  136. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  137. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  138. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  139. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  141. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  142. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  144. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  145. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  146. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  147. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  148. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  149. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  150. data/vendor/faiss/faiss/impl/io.h +31 -41
  151. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  152. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  153. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  154. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  155. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  156. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  157. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  158. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  159. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  160. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  161. data/vendor/faiss/faiss/index_factory.h +6 -7
  162. data/vendor/faiss/faiss/index_io.h +23 -26
  163. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  164. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  165. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  166. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  167. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  168. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  169. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  170. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  171. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  172. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  173. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  174. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  175. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  176. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  177. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  178. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  179. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  180. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  181. data/vendor/faiss/faiss/utils/distances.h +133 -118
  182. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  183. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  184. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  185. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  186. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  187. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  188. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  189. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  190. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  191. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  192. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  193. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  194. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  195. data/vendor/faiss/faiss/utils/random.h +13 -16
  196. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  197. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  198. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  199. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  200. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  201. data/vendor/faiss/faiss/utils/utils.h +53 -48
  202. metadata +20 -2
@@ -19,62 +19,68 @@ namespace faiss {
19
19
  /// The interface to this class itself is not thread safe
20
20
  template <typename IndexT>
21
21
  class ThreadedIndex : public IndexT {
22
- public:
23
- explicit ThreadedIndex(bool threaded);
24
- explicit ThreadedIndex(int d, bool threaded);
25
-
26
- ~ThreadedIndex() override;
27
-
28
- /// override an index that is managed by ourselves.
29
- /// WARNING: once an index is added, it becomes unsafe to touch it from any
30
- /// other thread than that on which is managing it, until we are shut
31
- /// down. Use runOnIndex to perform work on it instead.
32
- void addIndex(IndexT* index);
33
-
34
- /// Remove an index that is managed by ourselves.
35
- /// This will flush all pending work on that index, and then shut
36
- /// down its managing thread, and will remove the index.
37
- void removeIndex(IndexT* index);
38
-
39
- /// Run a function on all indices, in the thread that the index is
40
- /// managed in.
41
- /// Function arguments are (index in collection, index pointer)
42
- void runOnIndex(std::function<void(int, IndexT*)> f);
43
- void runOnIndex(std::function<void(int, const IndexT*)> f) const;
44
-
45
- /// faiss::Index API
46
- /// All indices receive the same call
47
- void reset() override;
48
-
49
- /// Returns the number of sub-indices
50
- int count() const { return indices_.size(); }
51
-
52
- /// Returns the i-th sub-index
53
- IndexT* at(int i) { return indices_[i].first; }
54
-
55
- /// Returns the i-th sub-index (const version)
56
- const IndexT* at(int i) const { return indices_[i].first; }
57
-
58
- /// Whether or not we are responsible for deleting our contained indices
59
- bool own_fields;
60
-
61
- protected:
62
- /// Called just after an index is added
63
- virtual void onAfterAddIndex(IndexT* index);
64
-
65
- /// Called just after an index is removed
66
- virtual void onAfterRemoveIndex(IndexT* index);
67
-
68
- protected:
69
- static void waitAndHandleFutures(std::vector<std::future<bool>>& v);
70
-
71
- /// Collection of Index instances, with their managing worker thread if any
72
- std::vector<std::pair<IndexT*, std::unique_ptr<WorkerThread>>> indices_;
73
-
74
- /// Is this index multi-threaded?
75
- bool isThreaded_;
22
+ public:
23
+ explicit ThreadedIndex(bool threaded);
24
+ explicit ThreadedIndex(int d, bool threaded);
25
+
26
+ ~ThreadedIndex() override;
27
+
28
+ /// override an index that is managed by ourselves.
29
+ /// WARNING: once an index is added, it becomes unsafe to touch it from any
30
+ /// other thread than that on which is managing it, until we are shut
31
+ /// down. Use runOnIndex to perform work on it instead.
32
+ void addIndex(IndexT* index);
33
+
34
+ /// Remove an index that is managed by ourselves.
35
+ /// This will flush all pending work on that index, and then shut
36
+ /// down its managing thread, and will remove the index.
37
+ void removeIndex(IndexT* index);
38
+
39
+ /// Run a function on all indices, in the thread that the index is
40
+ /// managed in.
41
+ /// Function arguments are (index in collection, index pointer)
42
+ void runOnIndex(std::function<void(int, IndexT*)> f);
43
+ void runOnIndex(std::function<void(int, const IndexT*)> f) const;
44
+
45
+ /// faiss::Index API
46
+ /// All indices receive the same call
47
+ void reset() override;
48
+
49
+ /// Returns the number of sub-indices
50
+ int count() const {
51
+ return indices_.size();
52
+ }
53
+
54
+ /// Returns the i-th sub-index
55
+ IndexT* at(int i) {
56
+ return indices_[i].first;
57
+ }
58
+
59
+ /// Returns the i-th sub-index (const version)
60
+ const IndexT* at(int i) const {
61
+ return indices_[i].first;
62
+ }
63
+
64
+ /// Whether or not we are responsible for deleting our contained indices
65
+ bool own_fields;
66
+
67
+ protected:
68
+ /// Called just after an index is added
69
+ virtual void onAfterAddIndex(IndexT* index);
70
+
71
+ /// Called just after an index is removed
72
+ virtual void onAfterRemoveIndex(IndexT* index);
73
+
74
+ protected:
75
+ static void waitAndHandleFutures(std::vector<std::future<bool>>& v);
76
+
77
+ /// Collection of Index instances, with their managing worker thread if any
78
+ std::vector<std::pair<IndexT*, std::unique_ptr<WorkerThread>>> indices_;
79
+
80
+ /// Is this index multi-threaded?
81
+ bool isThreaded_;
76
82
  };
77
83
 
78
- } // namespace
84
+ } // namespace faiss
79
85
 
80
86
  #include <faiss/impl/ThreadedIndex-inl.h>
@@ -9,11 +9,13 @@
9
9
 
10
10
  #include <faiss/index_io.h>
11
11
 
12
+ #include <faiss/impl/io_macros.h>
13
+
12
14
  #include <cstdio>
13
15
  #include <cstdlib>
14
16
 
15
- #include <sys/types.h>
16
17
  #include <sys/stat.h>
18
+ #include <sys/types.h>
17
19
 
18
20
  #include <faiss/impl/FaissAssert.h>
19
21
  #include <faiss/impl/io.h>
@@ -22,343 +24,383 @@
22
24
 
23
25
  #include <faiss/invlists/InvertedListsIOHook.h>
24
26
 
27
+ #include <faiss/Index2Layer.h>
25
28
  #include <faiss/IndexFlat.h>
26
- #include <faiss/VectorTransform.h>
27
- #include <faiss/IndexPreTransform.h>
28
- #include <faiss/IndexLSH.h>
29
- #include <faiss/IndexPQ.h>
29
+ #include <faiss/IndexHNSW.h>
30
30
  #include <faiss/IndexIVF.h>
31
+ #include <faiss/IndexIVFFlat.h>
31
32
  #include <faiss/IndexIVFPQ.h>
33
+ #include <faiss/IndexIVFPQFastScan.h>
32
34
  #include <faiss/IndexIVFPQR.h>
33
- #include <faiss/Index2Layer.h>
34
- #include <faiss/IndexIVFFlat.h>
35
35
  #include <faiss/IndexIVFSpectralHash.h>
36
- #include <faiss/MetaIndexes.h>
37
- #include <faiss/IndexScalarQuantizer.h>
38
- #include <faiss/IndexHNSW.h>
36
+ #include <faiss/IndexLSH.h>
39
37
  #include <faiss/IndexLattice.h>
38
+ #include <faiss/IndexNSG.h>
39
+ #include <faiss/IndexPQ.h>
40
40
  #include <faiss/IndexPQFastScan.h>
41
- #include <faiss/IndexIVFPQFastScan.h>
41
+ #include <faiss/IndexPreTransform.h>
42
42
  #include <faiss/IndexRefine.h>
43
+ #include <faiss/IndexResidual.h>
44
+ #include <faiss/IndexScalarQuantizer.h>
45
+ #include <faiss/MetaIndexes.h>
46
+ #include <faiss/VectorTransform.h>
43
47
 
44
48
  #include <faiss/IndexBinaryFlat.h>
45
49
  #include <faiss/IndexBinaryFromFloat.h>
46
50
  #include <faiss/IndexBinaryHNSW.h>
47
- #include <faiss/IndexBinaryIVF.h>
48
51
  #include <faiss/IndexBinaryHash.h>
52
+ #include <faiss/IndexBinaryIVF.h>
49
53
 
50
54
  namespace faiss {
51
55
 
52
-
53
56
  /*************************************************************
54
57
  * Read
55
58
  **************************************************************/
56
59
 
57
- static void read_index_header (Index *idx, IOReader *f) {
58
- READ1 (idx->d);
59
- READ1 (idx->ntotal);
60
+ static void read_index_header(Index* idx, IOReader* f) {
61
+ READ1(idx->d);
62
+ READ1(idx->ntotal);
60
63
  Index::idx_t dummy;
61
- READ1 (dummy);
62
- READ1 (dummy);
63
- READ1 (idx->is_trained);
64
- READ1 (idx->metric_type);
64
+ READ1(dummy);
65
+ READ1(dummy);
66
+ READ1(idx->is_trained);
67
+ READ1(idx->metric_type);
65
68
  if (idx->metric_type > 1) {
66
- READ1 (idx->metric_arg);
69
+ READ1(idx->metric_arg);
67
70
  }
68
71
  idx->verbose = false;
69
72
  }
70
73
 
71
- VectorTransform* read_VectorTransform (IOReader *f) {
74
+ VectorTransform* read_VectorTransform(IOReader* f) {
72
75
  uint32_t h;
73
- READ1 (h);
74
- VectorTransform *vt = nullptr;
75
-
76
- if (h == fourcc ("rrot") || h == fourcc ("PCAm") ||
77
- h == fourcc ("LTra") || h == fourcc ("PcAm") ||
78
- h == fourcc ("Viqm")) {
79
- LinearTransform *lt = nullptr;
80
- if (h == fourcc ("rrot")) {
81
- lt = new RandomRotationMatrix ();
82
- } else if (h == fourcc ("PCAm") ||
83
- h == fourcc ("PcAm")) {
84
- PCAMatrix * pca = new PCAMatrix ();
85
- READ1 (pca->eigen_power);
86
- READ1 (pca->random_rotation);
87
- if (h == fourcc ("PcAm"))
88
- READ1 (pca->balanced_bins);
89
- READVECTOR (pca->mean);
90
- READVECTOR (pca->eigenvalues);
91
- READVECTOR (pca->PCAMat);
76
+ READ1(h);
77
+ VectorTransform* vt = nullptr;
78
+
79
+ if (h == fourcc("rrot") || h == fourcc("PCAm") || h == fourcc("LTra") ||
80
+ h == fourcc("PcAm") || h == fourcc("Viqm")) {
81
+ LinearTransform* lt = nullptr;
82
+ if (h == fourcc("rrot")) {
83
+ lt = new RandomRotationMatrix();
84
+ } else if (h == fourcc("PCAm") || h == fourcc("PcAm")) {
85
+ PCAMatrix* pca = new PCAMatrix();
86
+ READ1(pca->eigen_power);
87
+ READ1(pca->random_rotation);
88
+ if (h == fourcc("PcAm"))
89
+ READ1(pca->balanced_bins);
90
+ READVECTOR(pca->mean);
91
+ READVECTOR(pca->eigenvalues);
92
+ READVECTOR(pca->PCAMat);
92
93
  lt = pca;
93
- } else if (h == fourcc ("Viqm")) {
94
- ITQMatrix *itqm = new ITQMatrix ();
95
- READ1 (itqm->max_iter);
96
- READ1 (itqm->seed);
94
+ } else if (h == fourcc("Viqm")) {
95
+ ITQMatrix* itqm = new ITQMatrix();
96
+ READ1(itqm->max_iter);
97
+ READ1(itqm->seed);
97
98
  lt = itqm;
98
- } else if (h == fourcc ("LTra")) {
99
- lt = new LinearTransform ();
99
+ } else if (h == fourcc("LTra")) {
100
+ lt = new LinearTransform();
100
101
  }
101
- READ1 (lt->have_bias);
102
- READVECTOR (lt->A);
103
- READVECTOR (lt->b);
104
- FAISS_THROW_IF_NOT (lt->A.size() >= lt->d_in * lt->d_out);
105
- FAISS_THROW_IF_NOT (!lt->have_bias || lt->b.size() >= lt->d_out);
102
+ READ1(lt->have_bias);
103
+ READVECTOR(lt->A);
104
+ READVECTOR(lt->b);
105
+ FAISS_THROW_IF_NOT(lt->A.size() >= lt->d_in * lt->d_out);
106
+ FAISS_THROW_IF_NOT(!lt->have_bias || lt->b.size() >= lt->d_out);
106
107
  lt->set_is_orthonormal();
107
108
  vt = lt;
108
- } else if (h == fourcc ("RmDT")) {
109
- RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
110
- READVECTOR (rdt->map);
109
+ } else if (h == fourcc("RmDT")) {
110
+ RemapDimensionsTransform* rdt = new RemapDimensionsTransform();
111
+ READVECTOR(rdt->map);
111
112
  vt = rdt;
112
- } else if (h == fourcc ("VNrm")) {
113
- NormalizationTransform *nt = new NormalizationTransform ();
114
- READ1 (nt->norm);
113
+ } else if (h == fourcc("VNrm")) {
114
+ NormalizationTransform* nt = new NormalizationTransform();
115
+ READ1(nt->norm);
115
116
  vt = nt;
116
- } else if (h == fourcc ("VCnt")) {
117
- CenteringTransform *ct = new CenteringTransform ();
118
- READVECTOR (ct->mean);
117
+ } else if (h == fourcc("VCnt")) {
118
+ CenteringTransform* ct = new CenteringTransform();
119
+ READVECTOR(ct->mean);
119
120
  vt = ct;
120
- } else if (h == fourcc ("Viqt")) {
121
- ITQTransform *itqt = new ITQTransform ();
121
+ } else if (h == fourcc("Viqt")) {
122
+ ITQTransform* itqt = new ITQTransform();
122
123
 
123
- READVECTOR (itqt->mean);
124
- READ1 (itqt->do_pca);
124
+ READVECTOR(itqt->mean);
125
+ READ1(itqt->do_pca);
125
126
  {
126
- ITQMatrix *itqm = dynamic_cast<ITQMatrix*>
127
- (read_VectorTransform (f));
127
+ ITQMatrix* itqm = dynamic_cast<ITQMatrix*>(read_VectorTransform(f));
128
128
  FAISS_THROW_IF_NOT(itqm);
129
129
  itqt->itq = *itqm;
130
130
  delete itqm;
131
131
  }
132
132
  {
133
- LinearTransform *pi = dynamic_cast<LinearTransform*>
134
- (read_VectorTransform (f));
135
- FAISS_THROW_IF_NOT (pi);
133
+ LinearTransform* pi =
134
+ dynamic_cast<LinearTransform*>(read_VectorTransform(f));
135
+ FAISS_THROW_IF_NOT(pi);
136
136
  itqt->pca_then_itq = *pi;
137
137
  delete pi;
138
138
  }
139
139
  vt = itqt;
140
140
  } else {
141
141
  FAISS_THROW_FMT(
142
- "fourcc %ud (\"%s\") not recognized",
143
- h, fourcc_inv_printable(h).c_str()
144
- );
142
+ "fourcc %ud (\"%s\") not recognized",
143
+ h,
144
+ fourcc_inv_printable(h).c_str());
145
145
  }
146
- READ1 (vt->d_in);
147
- READ1 (vt->d_out);
148
- READ1 (vt->is_trained);
146
+ READ1(vt->d_in);
147
+ READ1(vt->d_out);
148
+ READ1(vt->is_trained);
149
149
  return vt;
150
150
  }
151
151
 
152
-
153
- static void read_ArrayInvertedLists_sizes (
154
- IOReader *f, std::vector<size_t> & sizes)
155
- {
152
+ static void read_ArrayInvertedLists_sizes(
153
+ IOReader* f,
154
+ std::vector<size_t>& sizes) {
156
155
  uint32_t list_type;
157
156
  READ1(list_type);
158
157
  if (list_type == fourcc("full")) {
159
158
  size_t os = sizes.size();
160
- READVECTOR (sizes);
161
- FAISS_THROW_IF_NOT (os == sizes.size());
159
+ READVECTOR(sizes);
160
+ FAISS_THROW_IF_NOT(os == sizes.size());
162
161
  } else if (list_type == fourcc("sprs")) {
163
162
  std::vector<size_t> idsizes;
164
- READVECTOR (idsizes);
163
+ READVECTOR(idsizes);
165
164
  for (size_t j = 0; j < idsizes.size(); j += 2) {
166
- FAISS_THROW_IF_NOT (idsizes[j] < sizes.size());
165
+ FAISS_THROW_IF_NOT(idsizes[j] < sizes.size());
167
166
  sizes[idsizes[j]] = idsizes[j + 1];
168
167
  }
169
168
  } else {
170
169
  FAISS_THROW_FMT(
171
- "list_type %ud (\"%s\") not recognized",
172
- list_type, fourcc_inv_printable(list_type).c_str()
173
- );
170
+ "list_type %ud (\"%s\") not recognized",
171
+ list_type,
172
+ fourcc_inv_printable(list_type).c_str());
174
173
  }
175
174
  }
176
175
 
177
- InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
176
+ InvertedLists* read_InvertedLists(IOReader* f, int io_flags) {
178
177
  uint32_t h;
179
- READ1 (h);
180
- if (h == fourcc ("il00")) {
181
- fprintf(stderr, "read_InvertedLists:"
178
+ READ1(h);
179
+ if (h == fourcc("il00")) {
180
+ fprintf(stderr,
181
+ "read_InvertedLists:"
182
182
  " WARN! inverted lists not stored with IVF object\n");
183
183
  return nullptr;
184
- } else if (h == fourcc ("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
185
- auto ails = new ArrayInvertedLists (0, 0);
186
- READ1 (ails->nlist);
187
- READ1 (ails->code_size);
188
- ails->ids.resize (ails->nlist);
189
- ails->codes.resize (ails->nlist);
190
- std::vector<size_t> sizes (ails->nlist);
191
- read_ArrayInvertedLists_sizes (f, sizes);
184
+ } else if (h == fourcc("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
185
+ auto ails = new ArrayInvertedLists(0, 0);
186
+ READ1(ails->nlist);
187
+ READ1(ails->code_size);
188
+ ails->ids.resize(ails->nlist);
189
+ ails->codes.resize(ails->nlist);
190
+ std::vector<size_t> sizes(ails->nlist);
191
+ read_ArrayInvertedLists_sizes(f, sizes);
192
192
  for (size_t i = 0; i < ails->nlist; i++) {
193
- ails->ids[i].resize (sizes[i]);
194
- ails->codes[i].resize (sizes[i] * ails->code_size);
193
+ ails->ids[i].resize(sizes[i]);
194
+ ails->codes[i].resize(sizes[i] * ails->code_size);
195
195
  }
196
196
  for (size_t i = 0; i < ails->nlist; i++) {
197
197
  size_t n = ails->ids[i].size();
198
198
  if (n > 0) {
199
- READANDCHECK (ails->codes[i].data(), n * ails->code_size);
200
- READANDCHECK (ails->ids[i].data(), n);
199
+ READANDCHECK(ails->codes[i].data(), n * ails->code_size);
200
+ READANDCHECK(ails->ids[i].data(), n);
201
201
  }
202
202
  }
203
203
  return ails;
204
204
 
205
- } else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
206
- // code is always ilxx where xx is specific to the type of invlists we want
207
- // so we get the 16 high bits from the io_flag and the 16 low bits as "il"
205
+ } else if (h == fourcc("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
206
+ // code is always ilxx where xx is specific to the type of invlists we
207
+ // want so we get the 16 high bits from the io_flag and the 16 low bits
208
+ // as "il"
208
209
  int h2 = (io_flags & 0xffff0000) | (fourcc("il__") & 0x0000ffff);
209
210
  size_t nlist, code_size;
210
- READ1 (nlist);
211
- READ1 (code_size);
212
- std::vector<size_t> sizes (nlist);
213
- read_ArrayInvertedLists_sizes (f, sizes);
211
+ READ1(nlist);
212
+ READ1(code_size);
213
+ std::vector<size_t> sizes(nlist);
214
+ read_ArrayInvertedLists_sizes(f, sizes);
214
215
  return InvertedListsIOHook::lookup(h2)->read_ArrayInvertedLists(
215
216
  f, io_flags, nlist, code_size, sizes);
216
217
  } else {
217
218
  return InvertedListsIOHook::lookup(h)->read(f, io_flags);
218
219
  }
219
-
220
220
  }
221
221
 
222
-
223
- static void read_InvertedLists (
224
- IndexIVF *ivf, IOReader *f, int io_flags) {
225
- InvertedLists *ils = read_InvertedLists (f, io_flags);
222
+ static void read_InvertedLists(IndexIVF* ivf, IOReader* f, int io_flags) {
223
+ InvertedLists* ils = read_InvertedLists(f, io_flags);
226
224
  if (ils) {
227
- FAISS_THROW_IF_NOT (ils->nlist == ivf->nlist);
228
- FAISS_THROW_IF_NOT (ils->code_size == InvertedLists::INVALID_CODE_SIZE ||
229
- ils->code_size == ivf->code_size);
225
+ FAISS_THROW_IF_NOT(ils->nlist == ivf->nlist);
226
+ FAISS_THROW_IF_NOT(
227
+ ils->code_size == InvertedLists::INVALID_CODE_SIZE ||
228
+ ils->code_size == ivf->code_size);
230
229
  }
231
230
  ivf->invlists = ils;
232
231
  ivf->own_invlists = true;
233
232
  }
234
233
 
235
- static void read_ProductQuantizer (ProductQuantizer *pq, IOReader *f) {
236
- READ1 (pq->d);
237
- READ1 (pq->M);
238
- READ1 (pq->nbits);
239
- pq->set_derived_values ();
240
- READVECTOR (pq->centroids);
234
+ static void read_ProductQuantizer(ProductQuantizer* pq, IOReader* f) {
235
+ READ1(pq->d);
236
+ READ1(pq->M);
237
+ READ1(pq->nbits);
238
+ pq->set_derived_values();
239
+ READVECTOR(pq->centroids);
240
+ }
241
+
242
+ static void read_ResidualQuantizer(ResidualQuantizer* rq, IOReader* f) {
243
+ READ1(rq->d);
244
+ READ1(rq->M);
245
+ READVECTOR(rq->nbits);
246
+ rq->set_derived_values();
247
+ READ1(rq->is_trained);
248
+ READ1(rq->train_type);
249
+ READ1(rq->max_beam_size);
250
+ READVECTOR(rq->codebooks);
241
251
  }
242
252
 
243
- static void read_ScalarQuantizer (ScalarQuantizer *ivsc, IOReader *f) {
244
- READ1 (ivsc->qtype);
245
- READ1 (ivsc->rangestat);
246
- READ1 (ivsc->rangestat_arg);
247
- READ1 (ivsc->d);
248
- READ1 (ivsc->code_size);
249
- READVECTOR (ivsc->trained);
250
- ivsc->set_derived_sizes ();
253
+ static void read_ScalarQuantizer(ScalarQuantizer* ivsc, IOReader* f) {
254
+ READ1(ivsc->qtype);
255
+ READ1(ivsc->rangestat);
256
+ READ1(ivsc->rangestat_arg);
257
+ READ1(ivsc->d);
258
+ READ1(ivsc->code_size);
259
+ READVECTOR(ivsc->trained);
260
+ ivsc->set_derived_sizes();
251
261
  }
252
262
 
263
+ static void read_HNSW(HNSW* hnsw, IOReader* f) {
264
+ READVECTOR(hnsw->assign_probas);
265
+ READVECTOR(hnsw->cum_nneighbor_per_level);
266
+ READVECTOR(hnsw->levels);
267
+ READVECTOR(hnsw->offsets);
268
+ READVECTOR(hnsw->neighbors);
269
+
270
+ READ1(hnsw->entry_point);
271
+ READ1(hnsw->max_level);
272
+ READ1(hnsw->efConstruction);
273
+ READ1(hnsw->efSearch);
274
+ READ1(hnsw->upper_beam);
275
+ }
253
276
 
254
- static void read_HNSW (HNSW *hnsw, IOReader *f) {
255
- READVECTOR (hnsw->assign_probas);
256
- READVECTOR (hnsw->cum_nneighbor_per_level);
257
- READVECTOR (hnsw->levels);
258
- READVECTOR (hnsw->offsets);
259
- READVECTOR (hnsw->neighbors);
277
+ static void read_NSG(NSG* nsg, IOReader* f) {
278
+ READ1(nsg->ntotal);
279
+ READ1(nsg->R);
280
+ READ1(nsg->L);
281
+ READ1(nsg->C);
282
+ READ1(nsg->search_L);
283
+ READ1(nsg->enterpoint);
284
+ READ1(nsg->is_built);
285
+
286
+ if (!nsg->is_built) {
287
+ return;
288
+ }
260
289
 
261
- READ1 (hnsw->entry_point);
262
- READ1 (hnsw->max_level);
263
- READ1 (hnsw->efConstruction);
264
- READ1 (hnsw->efSearch);
265
- READ1 (hnsw->upper_beam);
290
+ constexpr int EMPTY_ID = -1;
291
+ int N = nsg->ntotal;
292
+ int R = nsg->R;
293
+ auto& graph = nsg->final_graph;
294
+ graph = std::make_shared<nsg::Graph<int>>(N, R);
295
+ std::fill_n(graph->data, N * R, EMPTY_ID);
296
+
297
+ int size = 0;
298
+
299
+ for (int i = 0; i < N; i++) {
300
+ for (int j = 0; j < R + 1; j++) {
301
+ int id;
302
+ READ1(id);
303
+ if (id != EMPTY_ID) {
304
+ graph->at(i, j) = id;
305
+ size += 1;
306
+ } else {
307
+ break;
308
+ }
309
+ }
310
+ }
266
311
  }
267
312
 
268
- ProductQuantizer * read_ProductQuantizer (const char*fname) {
313
+ ProductQuantizer* read_ProductQuantizer(const char* fname) {
269
314
  FileIOReader reader(fname);
270
315
  return read_ProductQuantizer(&reader);
271
316
  }
272
317
 
273
- ProductQuantizer * read_ProductQuantizer (IOReader *reader) {
274
- ProductQuantizer *pq = new ProductQuantizer();
275
- ScopeDeleter1<ProductQuantizer> del (pq);
318
+ ProductQuantizer* read_ProductQuantizer(IOReader* reader) {
319
+ ProductQuantizer* pq = new ProductQuantizer();
320
+ ScopeDeleter1<ProductQuantizer> del(pq);
276
321
 
277
- read_ProductQuantizer(pq, reader);
278
- del.release ();
279
- return pq;
322
+ read_ProductQuantizer(pq, reader);
323
+ del.release();
324
+ return pq;
280
325
  }
281
326
 
282
- static void read_direct_map (DirectMap *dm, IOReader *f) {
327
+ static void read_direct_map(DirectMap* dm, IOReader* f) {
283
328
  char maintain_direct_map;
284
- READ1 (maintain_direct_map);
329
+ READ1(maintain_direct_map);
285
330
  dm->type = (DirectMap::Type)maintain_direct_map;
286
- READVECTOR (dm->array);
331
+ READVECTOR(dm->array);
287
332
  if (dm->type == DirectMap::Hashtable) {
288
333
  using idx_t = Index::idx_t;
289
334
  std::vector<std::pair<idx_t, idx_t>> v;
290
- READVECTOR (v);
291
- std::unordered_map<idx_t, idx_t> & map = dm->hashtable;
292
- map.reserve (v.size());
293
- for (auto it: v) {
294
- map [it.first] = it.second;
335
+ READVECTOR(v);
336
+ std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
337
+ map.reserve(v.size());
338
+ for (auto it : v) {
339
+ map[it.first] = it.second;
295
340
  }
296
341
  }
297
-
298
342
  }
299
343
 
300
-
301
- static void read_ivf_header (
302
- IndexIVF *ivf, IOReader *f,
303
- std::vector<std::vector<Index::idx_t> > *ids = nullptr)
304
- {
305
- read_index_header (ivf, f);
306
- READ1 (ivf->nlist);
307
- READ1 (ivf->nprobe);
308
- ivf->quantizer = read_index (f);
344
+ static void read_ivf_header(
345
+ IndexIVF* ivf,
346
+ IOReader* f,
347
+ std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
348
+ read_index_header(ivf, f);
349
+ READ1(ivf->nlist);
350
+ READ1(ivf->nprobe);
351
+ ivf->quantizer = read_index(f);
309
352
  ivf->own_fields = true;
310
353
  if (ids) { // used in legacy "Iv" formats
311
- ids->resize (ivf->nlist);
354
+ ids->resize(ivf->nlist);
312
355
  for (size_t i = 0; i < ivf->nlist; i++)
313
- READVECTOR ((*ids)[i]);
356
+ READVECTOR((*ids)[i]);
314
357
  }
315
- read_direct_map (&ivf->direct_map, f);
358
+ read_direct_map(&ivf->direct_map, f);
316
359
  }
317
360
 
318
361
  // used for legacy formats
319
- static ArrayInvertedLists *set_array_invlist(
320
- IndexIVF *ivf, std::vector<std::vector<Index::idx_t> > &ids)
321
- {
322
- ArrayInvertedLists *ail = new ArrayInvertedLists (
323
- ivf->nlist, ivf->code_size);
324
- std::swap (ail->ids, ids);
362
+ static ArrayInvertedLists* set_array_invlist(
363
+ IndexIVF* ivf,
364
+ std::vector<std::vector<Index::idx_t>>& ids) {
365
+ ArrayInvertedLists* ail =
366
+ new ArrayInvertedLists(ivf->nlist, ivf->code_size);
367
+ std::swap(ail->ids, ids);
325
368
  ivf->invlists = ail;
326
369
  ivf->own_invlists = true;
327
370
  return ail;
328
371
  }
329
372
 
330
- static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags)
331
- {
332
- bool legacy = h == fourcc ("IvQR") || h == fourcc ("IvPQ");
373
+ static IndexIVFPQ* read_ivfpq(IOReader* f, uint32_t h, int io_flags) {
374
+ bool legacy = h == fourcc("IvQR") || h == fourcc("IvPQ");
333
375
 
334
- IndexIVFPQR *ivfpqr =
335
- h == fourcc ("IvQR") || h == fourcc ("IwQR") ?
336
- new IndexIVFPQR () : nullptr;
337
- IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ ();
376
+ IndexIVFPQR* ivfpqr = h == fourcc("IvQR") || h == fourcc("IwQR")
377
+ ? new IndexIVFPQR()
378
+ : nullptr;
379
+ IndexIVFPQ* ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ();
338
380
 
339
- std::vector<std::vector<Index::idx_t> > ids;
340
- read_ivf_header (ivpq, f, legacy ? &ids : nullptr);
341
- READ1 (ivpq->by_residual);
342
- READ1 (ivpq->code_size);
343
- read_ProductQuantizer (&ivpq->pq, f);
381
+ std::vector<std::vector<Index::idx_t>> ids;
382
+ read_ivf_header(ivpq, f, legacy ? &ids : nullptr);
383
+ READ1(ivpq->by_residual);
384
+ READ1(ivpq->code_size);
385
+ read_ProductQuantizer(&ivpq->pq, f);
344
386
 
345
387
  if (legacy) {
346
- ArrayInvertedLists *ail = set_array_invlist (ivpq, ids);
388
+ ArrayInvertedLists* ail = set_array_invlist(ivpq, ids);
347
389
  for (size_t i = 0; i < ail->nlist; i++)
348
- READVECTOR (ail->codes[i]);
390
+ READVECTOR(ail->codes[i]);
349
391
  } else {
350
- read_InvertedLists (ivpq, f, io_flags);
392
+ read_InvertedLists(ivpq, f, io_flags);
351
393
  }
352
394
 
353
395
  if (ivpq->is_trained) {
354
396
  // precomputed table not stored. It is cheaper to recompute it
355
397
  ivpq->use_precomputed_table = 0;
356
398
  if (ivpq->by_residual)
357
- ivpq->precompute_table ();
399
+ ivpq->precompute_table();
358
400
  if (ivfpqr) {
359
- read_ProductQuantizer (&ivfpqr->refine_pq, f);
360
- READVECTOR (ivfpqr->refine_codes);
361
- READ1 (ivfpqr->k_factor);
401
+ read_ProductQuantizer(&ivfpqr->refine_pq, f);
402
+ READVECTOR(ivfpqr->refine_codes);
403
+ READ1(ivfpqr->k_factor);
362
404
  }
363
405
  }
364
406
  return ivpq;
@@ -366,200 +408,216 @@ static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags)
366
408
 
367
409
  int read_old_fmt_hack = 0;
368
410
 
369
- Index *read_index (IOReader *f, int io_flags) {
370
- Index * idx = nullptr;
411
+ Index* read_index(IOReader* f, int io_flags) {
412
+ Index* idx = nullptr;
371
413
  uint32_t h;
372
- READ1 (h);
373
- if (h == fourcc ("IxFI") || h == fourcc ("IxF2") || h == fourcc("IxFl")) {
374
- IndexFlat *idxf;
375
- if (h == fourcc ("IxFI")) {
376
- idxf = new IndexFlatIP ();
414
+ READ1(h);
415
+ if (h == fourcc("IxFI") || h == fourcc("IxF2") || h == fourcc("IxFl")) {
416
+ IndexFlat* idxf;
417
+ if (h == fourcc("IxFI")) {
418
+ idxf = new IndexFlatIP();
377
419
  } else if (h == fourcc("IxF2")) {
378
- idxf = new IndexFlatL2 ();
420
+ idxf = new IndexFlatL2();
379
421
  } else {
380
- idxf = new IndexFlat ();
422
+ idxf = new IndexFlat();
381
423
  }
382
- read_index_header (idxf, f);
383
- READVECTOR (idxf->xb);
384
- FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->d);
424
+ read_index_header(idxf, f);
425
+ READVECTOR(idxf->xb);
426
+ FAISS_THROW_IF_NOT(idxf->xb.size() == idxf->ntotal * idxf->d);
385
427
  // leak!
386
428
  idx = idxf;
387
429
  } else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
388
- IndexLSH * idxl = new IndexLSH ();
389
- read_index_header (idxl, f);
390
- READ1 (idxl->nbits);
391
- READ1 (idxl->rotate_data);
392
- READ1 (idxl->train_thresholds);
393
- READVECTOR (idxl->thresholds);
394
- READ1 (idxl->bytes_per_vec);
430
+ IndexLSH* idxl = new IndexLSH();
431
+ read_index_header(idxl, f);
432
+ READ1(idxl->nbits);
433
+ READ1(idxl->rotate_data);
434
+ READ1(idxl->train_thresholds);
435
+ READVECTOR(idxl->thresholds);
436
+ READ1(idxl->bytes_per_vec);
395
437
  if (h == fourcc("IxHE")) {
396
- FAISS_THROW_IF_NOT_FMT (idxl->nbits % 64 == 0,
397
- "can only read old format IndexLSH with "
398
- "nbits multiple of 64 (got %d)",
399
- (int) idxl->nbits);
438
+ FAISS_THROW_IF_NOT_FMT(
439
+ idxl->nbits % 64 == 0,
440
+ "can only read old format IndexLSH with "
441
+ "nbits multiple of 64 (got %d)",
442
+ (int)idxl->nbits);
400
443
  // leak
401
444
  idxl->bytes_per_vec *= 8;
402
445
  }
403
446
  {
404
- RandomRotationMatrix *rrot = dynamic_cast<RandomRotationMatrix *>
405
- (read_VectorTransform (f));
447
+ RandomRotationMatrix* rrot = dynamic_cast<RandomRotationMatrix*>(
448
+ read_VectorTransform(f));
406
449
  FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation");
407
450
  idxl->rrot = *rrot;
408
451
  delete rrot;
409
452
  }
410
- READVECTOR (idxl->codes);
411
- FAISS_THROW_IF_NOT (idxl->rrot.d_in == idxl->d &&
412
- idxl->rrot.d_out == idxl->nbits);
413
- FAISS_THROW_IF_NOT (
414
- idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
453
+ READVECTOR(idxl->codes);
454
+ FAISS_THROW_IF_NOT(
455
+ idxl->rrot.d_in == idxl->d && idxl->rrot.d_out == idxl->nbits);
456
+ FAISS_THROW_IF_NOT(
457
+ idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
415
458
  idx = idxl;
416
- } else if (h == fourcc ("IxPQ") || h == fourcc ("IxPo") ||
417
- h == fourcc ("IxPq")) {
459
+ } else if (
460
+ h == fourcc("IxPQ") || h == fourcc("IxPo") || h == fourcc("IxPq")) {
418
461
  // IxPQ and IxPo were merged into the same IndexPQ object
419
- IndexPQ * idxp =new IndexPQ ();
420
- read_index_header (idxp, f);
421
- read_ProductQuantizer (&idxp->pq, f);
422
- READVECTOR (idxp->codes);
423
- if (h == fourcc ("IxPo") || h == fourcc ("IxPq")) {
424
- READ1 (idxp->search_type);
425
- READ1 (idxp->encode_signs);
426
- READ1 (idxp->polysemous_ht);
462
+ IndexPQ* idxp = new IndexPQ();
463
+ read_index_header(idxp, f);
464
+ read_ProductQuantizer(&idxp->pq, f);
465
+ READVECTOR(idxp->codes);
466
+ if (h == fourcc("IxPo") || h == fourcc("IxPq")) {
467
+ READ1(idxp->search_type);
468
+ READ1(idxp->encode_signs);
469
+ READ1(idxp->polysemous_ht);
427
470
  }
428
471
  // Old versoins of PQ all had metric_type set to INNER_PRODUCT
429
472
  // when they were in fact using L2. Therefore, we force metric type
430
473
  // to L2 when the old format is detected
431
- if (h == fourcc ("IxPQ") || h == fourcc ("IxPo")) {
474
+ if (h == fourcc("IxPQ") || h == fourcc("IxPo")) {
432
475
  idxp->metric_type = METRIC_L2;
433
476
  }
434
477
  idx = idxp;
435
- } else if (h == fourcc ("IvFl") || h == fourcc("IvFL")) { // legacy
436
- IndexIVFFlat * ivfl = new IndexIVFFlat ();
437
- std::vector<std::vector<Index::idx_t> > ids;
438
- read_ivf_header (ivfl, f, &ids);
478
+ } else if (h == fourcc("IxRQ")) {
479
+ IndexResidual* idxr = new IndexResidual();
480
+ read_index_header(idxr, f);
481
+ read_ResidualQuantizer(&idxr->rq, f);
482
+ READ1(idxr->search_type);
483
+ READ1(idxr->norm_min);
484
+ READ1(idxr->norm_max);
485
+ READ1(idxr->code_size);
486
+ READVECTOR(idxr->codes);
487
+ idx = idxr;
488
+ } else if (h == fourcc("ImRQ")) {
489
+ ResidualCoarseQuantizer* idxr = new ResidualCoarseQuantizer();
490
+ read_index_header(idxr, f);
491
+ read_ResidualQuantizer(&idxr->rq, f);
492
+ READ1(idxr->beam_factor);
493
+ idxr->set_beam_factor(idxr->beam_factor);
494
+ idx = idxr;
495
+ } else if (h == fourcc("IvFl") || h == fourcc("IvFL")) { // legacy
496
+ IndexIVFFlat* ivfl = new IndexIVFFlat();
497
+ std::vector<std::vector<Index::idx_t>> ids;
498
+ read_ivf_header(ivfl, f, &ids);
439
499
  ivfl->code_size = ivfl->d * sizeof(float);
440
- ArrayInvertedLists *ail = set_array_invlist (ivfl, ids);
500
+ ArrayInvertedLists* ail = set_array_invlist(ivfl, ids);
441
501
 
442
- if (h == fourcc ("IvFL")) {
502
+ if (h == fourcc("IvFL")) {
443
503
  for (size_t i = 0; i < ivfl->nlist; i++) {
444
- READVECTOR (ail->codes[i]);
504
+ READVECTOR(ail->codes[i]);
445
505
  }
446
506
  } else { // old format
447
507
  for (size_t i = 0; i < ivfl->nlist; i++) {
448
508
  std::vector<float> vec;
449
- READVECTOR (vec);
509
+ READVECTOR(vec);
450
510
  ail->codes[i].resize(vec.size() * sizeof(float));
451
- memcpy(ail->codes[i].data(), vec.data(),
452
- ail->codes[i].size());
511
+ memcpy(ail->codes[i].data(), vec.data(), ail->codes[i].size());
453
512
  }
454
513
  }
455
514
  idx = ivfl;
456
- } else if (h == fourcc ("IwFd")) {
457
- IndexIVFFlatDedup * ivfl = new IndexIVFFlatDedup ();
458
- read_ivf_header (ivfl, f);
515
+ } else if (h == fourcc("IwFd")) {
516
+ IndexIVFFlatDedup* ivfl = new IndexIVFFlatDedup();
517
+ read_ivf_header(ivfl, f);
459
518
  ivfl->code_size = ivfl->d * sizeof(float);
460
519
  {
461
520
  std::vector<Index::idx_t> tab;
462
- READVECTOR (tab);
521
+ READVECTOR(tab);
463
522
  for (long i = 0; i < tab.size(); i += 2) {
464
- std::pair<Index::idx_t, Index::idx_t>
465
- pair (tab[i], tab[i + 1]);
466
- ivfl->instances.insert (pair);
523
+ std::pair<Index::idx_t, Index::idx_t> pair(tab[i], tab[i + 1]);
524
+ ivfl->instances.insert(pair);
467
525
  }
468
526
  }
469
- read_InvertedLists (ivfl, f, io_flags);
527
+ read_InvertedLists(ivfl, f, io_flags);
470
528
  idx = ivfl;
471
- } else if (h == fourcc ("IwFl")) {
472
- IndexIVFFlat * ivfl = new IndexIVFFlat ();
473
- read_ivf_header (ivfl, f);
529
+ } else if (h == fourcc("IwFl")) {
530
+ IndexIVFFlat* ivfl = new IndexIVFFlat();
531
+ read_ivf_header(ivfl, f);
474
532
  ivfl->code_size = ivfl->d * sizeof(float);
475
- read_InvertedLists (ivfl, f, io_flags);
533
+ read_InvertedLists(ivfl, f, io_flags);
476
534
  idx = ivfl;
477
- } else if (h == fourcc ("IxSQ")) {
478
- IndexScalarQuantizer * idxs = new IndexScalarQuantizer ();
479
- read_index_header (idxs, f);
480
- read_ScalarQuantizer (&idxs->sq, f);
481
- READVECTOR (idxs->codes);
535
+ } else if (h == fourcc("IxSQ")) {
536
+ IndexScalarQuantizer* idxs = new IndexScalarQuantizer();
537
+ read_index_header(idxs, f);
538
+ read_ScalarQuantizer(&idxs->sq, f);
539
+ READVECTOR(idxs->codes);
482
540
  idxs->code_size = idxs->sq.code_size;
483
541
  idx = idxs;
484
- } else if (h == fourcc ("IxLa")) {
542
+ } else if (h == fourcc("IxLa")) {
485
543
  int d, nsq, scale_nbit, r2;
486
- READ1 (d);
487
- READ1 (nsq);
488
- READ1 (scale_nbit);
489
- READ1 (r2);
490
- IndexLattice *idxl = new IndexLattice (d, nsq, scale_nbit, r2);
491
- read_index_header (idxl, f);
492
- READVECTOR (idxl->trained);
544
+ READ1(d);
545
+ READ1(nsq);
546
+ READ1(scale_nbit);
547
+ READ1(r2);
548
+ IndexLattice* idxl = new IndexLattice(d, nsq, scale_nbit, r2);
549
+ read_index_header(idxl, f);
550
+ READVECTOR(idxl->trained);
493
551
  idx = idxl;
494
- } else if(h == fourcc ("IvSQ")) { // legacy
495
- IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
496
- std::vector<std::vector<Index::idx_t> > ids;
497
- read_ivf_header (ivsc, f, &ids);
498
- read_ScalarQuantizer (&ivsc->sq, f);
499
- READ1 (ivsc->code_size);
500
- ArrayInvertedLists *ail = set_array_invlist (ivsc, ids);
501
- for(int i = 0; i < ivsc->nlist; i++)
502
- READVECTOR (ail->codes[i]);
552
+ } else if (h == fourcc("IvSQ")) { // legacy
553
+ IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
554
+ std::vector<std::vector<Index::idx_t>> ids;
555
+ read_ivf_header(ivsc, f, &ids);
556
+ read_ScalarQuantizer(&ivsc->sq, f);
557
+ READ1(ivsc->code_size);
558
+ ArrayInvertedLists* ail = set_array_invlist(ivsc, ids);
559
+ for (int i = 0; i < ivsc->nlist; i++)
560
+ READVECTOR(ail->codes[i]);
503
561
  idx = ivsc;
504
- } else if(h == fourcc ("IwSQ") || h == fourcc ("IwSq")) {
505
- IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
506
- read_ivf_header (ivsc, f);
507
- read_ScalarQuantizer (&ivsc->sq, f);
508
- READ1 (ivsc->code_size);
509
- if (h == fourcc ("IwSQ")) {
562
+ } else if (h == fourcc("IwSQ") || h == fourcc("IwSq")) {
563
+ IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
564
+ read_ivf_header(ivsc, f);
565
+ read_ScalarQuantizer(&ivsc->sq, f);
566
+ READ1(ivsc->code_size);
567
+ if (h == fourcc("IwSQ")) {
510
568
  ivsc->by_residual = true;
511
569
  } else {
512
- READ1 (ivsc->by_residual);
570
+ READ1(ivsc->by_residual);
513
571
  }
514
- read_InvertedLists (ivsc, f, io_flags);
572
+ read_InvertedLists(ivsc, f, io_flags);
515
573
  idx = ivsc;
516
- } else if(h == fourcc ("IwSh")) {
517
- IndexIVFSpectralHash *ivsp = new IndexIVFSpectralHash ();
518
- read_ivf_header (ivsp, f);
519
- ivsp->vt = read_VectorTransform (f);
574
+ } else if (h == fourcc("IwSh")) {
575
+ IndexIVFSpectralHash* ivsp = new IndexIVFSpectralHash();
576
+ read_ivf_header(ivsp, f);
577
+ ivsp->vt = read_VectorTransform(f);
520
578
  ivsp->own_fields = true;
521
- READ1 (ivsp->nbit);
579
+ READ1(ivsp->nbit);
522
580
  // not stored by write_ivf_header
523
581
  ivsp->code_size = (ivsp->nbit + 7) / 8;
524
- READ1 (ivsp->period);
525
- READ1 (ivsp->threshold_type);
526
- READVECTOR (ivsp->trained);
527
- read_InvertedLists (ivsp, f, io_flags);
582
+ READ1(ivsp->period);
583
+ READ1(ivsp->threshold_type);
584
+ READVECTOR(ivsp->trained);
585
+ read_InvertedLists(ivsp, f, io_flags);
528
586
  idx = ivsp;
529
- } else if(h == fourcc ("IvPQ") || h == fourcc ("IvQR") ||
530
- h == fourcc ("IwPQ") || h == fourcc ("IwQR")) {
531
-
532
- idx = read_ivfpq (f, h, io_flags);
587
+ } else if (
588
+ h == fourcc("IvPQ") || h == fourcc("IvQR") || h == fourcc("IwPQ") ||
589
+ h == fourcc("IwQR")) {
590
+ idx = read_ivfpq(f, h, io_flags);
533
591
 
534
- } else if(h == fourcc ("IxPT")) {
535
- IndexPreTransform * ixpt = new IndexPreTransform();
592
+ } else if (h == fourcc("IxPT")) {
593
+ IndexPreTransform* ixpt = new IndexPreTransform();
536
594
  ixpt->own_fields = true;
537
- read_index_header (ixpt, f);
595
+ read_index_header(ixpt, f);
538
596
  int nt;
539
597
  if (read_old_fmt_hack == 2) {
540
598
  nt = 1;
541
599
  } else {
542
- READ1 (nt);
600
+ READ1(nt);
543
601
  }
544
602
  for (int i = 0; i < nt; i++) {
545
- ixpt->chain.push_back (read_VectorTransform (f));
603
+ ixpt->chain.push_back(read_VectorTransform(f));
546
604
  }
547
- ixpt->index = read_index (f, io_flags);
605
+ ixpt->index = read_index(f, io_flags);
548
606
  idx = ixpt;
549
- } else if(h == fourcc ("Imiq")) {
550
- MultiIndexQuantizer * imiq = new MultiIndexQuantizer ();
551
- read_index_header (imiq, f);
552
- read_ProductQuantizer (&imiq->pq, f);
607
+ } else if (h == fourcc("Imiq")) {
608
+ MultiIndexQuantizer* imiq = new MultiIndexQuantizer();
609
+ read_index_header(imiq, f);
610
+ read_ProductQuantizer(&imiq->pq, f);
553
611
  idx = imiq;
554
- } else if(h == fourcc ("IxRF")) {
555
- IndexRefine *idxrf = new IndexRefine ();
556
- read_index_header (idxrf, f);
612
+ } else if (h == fourcc("IxRF")) {
613
+ IndexRefine* idxrf = new IndexRefine();
614
+ read_index_header(idxrf, f);
557
615
  idxrf->base_index = read_index(f, io_flags);
558
616
  idxrf->refine_index = read_index(f, io_flags);
559
- READ1 (idxrf->k_factor);
617
+ READ1(idxrf->k_factor);
560
618
  if (dynamic_cast<IndexFlat*>(idxrf->refine_index)) {
561
619
  // then make a RefineFlat with it
562
- IndexRefine *idxrf_old = idxrf;
620
+ IndexRefine* idxrf_old = idxrf;
563
621
  idxrf = new IndexRefineFlat();
564
622
  *idxrf = *idxrf_old;
565
623
  delete idxrf_old;
@@ -567,248 +625,260 @@ Index *read_index (IOReader *f, int io_flags) {
567
625
  idxrf->own_fields = true;
568
626
  idxrf->own_refine_index = true;
569
627
  idx = idxrf;
570
- } else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) {
571
- bool is_map2 = h == fourcc ("IxM2");
572
- IndexIDMap * idxmap = is_map2 ? new IndexIDMap2 () : new IndexIDMap ();
573
- read_index_header (idxmap, f);
574
- idxmap->index = read_index (f, io_flags);
628
+ } else if (h == fourcc("IxMp") || h == fourcc("IxM2")) {
629
+ bool is_map2 = h == fourcc("IxM2");
630
+ IndexIDMap* idxmap = is_map2 ? new IndexIDMap2() : new IndexIDMap();
631
+ read_index_header(idxmap, f);
632
+ idxmap->index = read_index(f, io_flags);
575
633
  idxmap->own_fields = true;
576
- READVECTOR (idxmap->id_map);
634
+ READVECTOR(idxmap->id_map);
577
635
  if (is_map2) {
578
- static_cast<IndexIDMap2*>(idxmap)->construct_rev_map ();
636
+ static_cast<IndexIDMap2*>(idxmap)->construct_rev_map();
579
637
  }
580
638
  idx = idxmap;
581
- } else if (h == fourcc ("Ix2L")) {
582
- Index2Layer * idxp = new Index2Layer ();
583
- read_index_header (idxp, f);
584
- idxp->q1.quantizer = read_index (f, io_flags);
585
- READ1 (idxp->q1.nlist);
586
- READ1 (idxp->q1.quantizer_trains_alone);
587
- read_ProductQuantizer (&idxp->pq, f);
588
- READ1 (idxp->code_size_1);
589
- READ1 (idxp->code_size_2);
590
- READ1 (idxp->code_size);
591
- READVECTOR (idxp->codes);
639
+ } else if (h == fourcc("Ix2L")) {
640
+ Index2Layer* idxp = new Index2Layer();
641
+ read_index_header(idxp, f);
642
+ idxp->q1.quantizer = read_index(f, io_flags);
643
+ READ1(idxp->q1.nlist);
644
+ READ1(idxp->q1.quantizer_trains_alone);
645
+ read_ProductQuantizer(&idxp->pq, f);
646
+ READ1(idxp->code_size_1);
647
+ READ1(idxp->code_size_2);
648
+ READ1(idxp->code_size);
649
+ READVECTOR(idxp->codes);
592
650
  idx = idxp;
593
- } else if(h == fourcc("IHNf") || h == fourcc("IHNp") ||
594
- h == fourcc("IHNs") || h == fourcc("IHN2")) {
595
- IndexHNSW *idxhnsw = nullptr;
596
- if (h == fourcc("IHNf")) idxhnsw = new IndexHNSWFlat ();
597
- if (h == fourcc("IHNp")) idxhnsw = new IndexHNSWPQ ();
598
- if (h == fourcc("IHNs")) idxhnsw = new IndexHNSWSQ ();
599
- if (h == fourcc("IHN2")) idxhnsw = new IndexHNSW2Level ();
600
- read_index_header (idxhnsw, f);
601
- read_HNSW (&idxhnsw->hnsw, f);
602
- idxhnsw->storage = read_index (f, io_flags);
651
+ } else if (
652
+ h == fourcc("IHNf") || h == fourcc("IHNp") || h == fourcc("IHNs") ||
653
+ h == fourcc("IHN2")) {
654
+ IndexHNSW* idxhnsw = nullptr;
655
+ if (h == fourcc("IHNf"))
656
+ idxhnsw = new IndexHNSWFlat();
657
+ if (h == fourcc("IHNp"))
658
+ idxhnsw = new IndexHNSWPQ();
659
+ if (h == fourcc("IHNs"))
660
+ idxhnsw = new IndexHNSWSQ();
661
+ if (h == fourcc("IHN2"))
662
+ idxhnsw = new IndexHNSW2Level();
663
+ read_index_header(idxhnsw, f);
664
+ read_HNSW(&idxhnsw->hnsw, f);
665
+ idxhnsw->storage = read_index(f, io_flags);
603
666
  idxhnsw->own_fields = true;
604
667
  if (h == fourcc("IHNp")) {
605
- dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table ();
668
+ dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table();
606
669
  }
607
670
  idx = idxhnsw;
608
- } else if(h == fourcc("IPfs")) {
609
- IndexPQFastScan *idxpqfs = new IndexPQFastScan();
610
- read_index_header (idxpqfs, f);
611
- read_ProductQuantizer (&idxpqfs->pq, f);
612
- READ1 (idxpqfs->implem);
613
- READ1 (idxpqfs->bbs);
614
- READ1 (idxpqfs->qbs);
615
- READ1 (idxpqfs->ntotal2);
616
- READ1 (idxpqfs->M2);
617
- READVECTOR (idxpqfs->codes);
671
+ } else if (h == fourcc("INSf")) {
672
+ IndexNSG* idxnsg = new IndexNSGFlat();
673
+ read_index_header(idxnsg, f);
674
+ READ1(idxnsg->GK);
675
+ READ1(idxnsg->build_type);
676
+ READ1(idxnsg->nndescent_S);
677
+ READ1(idxnsg->nndescent_R);
678
+ READ1(idxnsg->nndescent_L);
679
+ READ1(idxnsg->nndescent_iter);
680
+ read_NSG(&idxnsg->nsg, f);
681
+ idxnsg->storage = read_index(f, io_flags);
682
+ idxnsg->own_fields = true;
683
+ idx = idxnsg;
684
+ } else if (h == fourcc("IPfs")) {
685
+ IndexPQFastScan* idxpqfs = new IndexPQFastScan();
686
+ read_index_header(idxpqfs, f);
687
+ read_ProductQuantizer(&idxpqfs->pq, f);
688
+ READ1(idxpqfs->implem);
689
+ READ1(idxpqfs->bbs);
690
+ READ1(idxpqfs->qbs);
691
+ READ1(idxpqfs->ntotal2);
692
+ READ1(idxpqfs->M2);
693
+ READVECTOR(idxpqfs->codes);
618
694
  idx = idxpqfs;
619
695
 
620
696
  } else if (h == fourcc("IwPf")) {
621
- IndexIVFPQFastScan *ivpq = new IndexIVFPQFastScan();
622
- read_ivf_header (ivpq, f);
623
- READ1 (ivpq->by_residual);
624
- READ1 (ivpq->code_size);
625
- READ1 (ivpq->bbs);
626
- READ1 (ivpq->M2);
627
- READ1 (ivpq->implem);
628
- READ1 (ivpq->qbs2);
629
- read_ProductQuantizer (&ivpq->pq, f);
630
- read_InvertedLists (ivpq, f, io_flags);
697
+ IndexIVFPQFastScan* ivpq = new IndexIVFPQFastScan();
698
+ read_ivf_header(ivpq, f);
699
+ READ1(ivpq->by_residual);
700
+ READ1(ivpq->code_size);
701
+ READ1(ivpq->bbs);
702
+ READ1(ivpq->M2);
703
+ READ1(ivpq->implem);
704
+ READ1(ivpq->qbs2);
705
+ read_ProductQuantizer(&ivpq->pq, f);
706
+ read_InvertedLists(ivpq, f, io_flags);
631
707
  ivpq->precompute_table();
632
708
  idx = ivpq;
633
709
  } else {
634
710
  FAISS_THROW_FMT(
635
- "Index type 0x%08x (\"%s\") not recognized",
636
- h, fourcc_inv_printable(h).c_str()
637
- );
711
+ "Index type 0x%08x (\"%s\") not recognized",
712
+ h,
713
+ fourcc_inv_printable(h).c_str());
638
714
  idx = nullptr;
639
715
  }
640
716
  return idx;
641
717
  }
642
718
 
643
-
644
- Index *read_index (FILE * f, int io_flags) {
719
+ Index* read_index(FILE* f, int io_flags) {
645
720
  FileIOReader reader(f);
646
721
  return read_index(&reader, io_flags);
647
722
  }
648
723
 
649
- Index *read_index (const char *fname, int io_flags) {
724
+ Index* read_index(const char* fname, int io_flags) {
650
725
  FileIOReader reader(fname);
651
- Index *idx = read_index (&reader, io_flags);
726
+ Index* idx = read_index(&reader, io_flags);
652
727
  return idx;
653
728
  }
654
729
 
655
- VectorTransform *read_VectorTransform (const char *fname) {
730
+ VectorTransform* read_VectorTransform(const char* fname) {
656
731
  FileIOReader reader(fname);
657
- VectorTransform *vt = read_VectorTransform (&reader);
732
+ VectorTransform* vt = read_VectorTransform(&reader);
658
733
  return vt;
659
734
  }
660
735
 
661
-
662
-
663
736
  /*************************************************************
664
737
  * Read binary indexes
665
738
  **************************************************************/
666
739
 
667
- static void read_InvertedLists (
668
- IndexBinaryIVF *ivf, IOReader *f, int io_flags) {
669
- InvertedLists *ils = read_InvertedLists (f, io_flags);
670
- FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist &&
671
- ils->code_size == ivf->code_size));
740
+ static void read_InvertedLists(IndexBinaryIVF* ivf, IOReader* f, int io_flags) {
741
+ InvertedLists* ils = read_InvertedLists(f, io_flags);
742
+ FAISS_THROW_IF_NOT(
743
+ !ils ||
744
+ (ils->nlist == ivf->nlist && ils->code_size == ivf->code_size));
672
745
  ivf->invlists = ils;
673
746
  ivf->own_invlists = true;
674
747
  }
675
748
 
676
-
677
-
678
- static void read_index_binary_header (IndexBinary *idx, IOReader *f) {
679
- READ1 (idx->d);
680
- READ1 (idx->code_size);
681
- READ1 (idx->ntotal);
682
- READ1 (idx->is_trained);
683
- READ1 (idx->metric_type);
749
+ static void read_index_binary_header(IndexBinary* idx, IOReader* f) {
750
+ READ1(idx->d);
751
+ READ1(idx->code_size);
752
+ READ1(idx->ntotal);
753
+ READ1(idx->is_trained);
754
+ READ1(idx->metric_type);
684
755
  idx->verbose = false;
685
756
  }
686
757
 
687
- static void read_binary_ivf_header (
688
- IndexBinaryIVF *ivf, IOReader *f,
689
- std::vector<std::vector<Index::idx_t> > *ids = nullptr)
690
- {
691
- read_index_binary_header (ivf, f);
692
- READ1 (ivf->nlist);
693
- READ1 (ivf->nprobe);
694
- ivf->quantizer = read_index_binary (f);
758
+ static void read_binary_ivf_header(
759
+ IndexBinaryIVF* ivf,
760
+ IOReader* f,
761
+ std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
762
+ read_index_binary_header(ivf, f);
763
+ READ1(ivf->nlist);
764
+ READ1(ivf->nprobe);
765
+ ivf->quantizer = read_index_binary(f);
695
766
  ivf->own_fields = true;
696
767
  if (ids) { // used in legacy "Iv" formats
697
- ids->resize (ivf->nlist);
768
+ ids->resize(ivf->nlist);
698
769
  for (size_t i = 0; i < ivf->nlist; i++)
699
- READVECTOR ((*ids)[i]);
770
+ READVECTOR((*ids)[i]);
700
771
  }
701
- read_direct_map (&ivf->direct_map, f);
772
+ read_direct_map(&ivf->direct_map, f);
702
773
  }
703
774
 
704
- static void read_binary_hash_invlists (
705
- IndexBinaryHash::InvertedListMap &invlists,
706
- int b, IOReader *f)
707
- {
775
+ static void read_binary_hash_invlists(
776
+ IndexBinaryHash::InvertedListMap& invlists,
777
+ int b,
778
+ IOReader* f) {
708
779
  size_t sz;
709
- READ1 (sz);
780
+ READ1(sz);
710
781
  int il_nbit = 0;
711
- READ1 (il_nbit);
782
+ READ1(il_nbit);
712
783
  // buffer for bitstrings
713
784
  std::vector<uint8_t> buf((b + il_nbit) * sz);
714
- READVECTOR (buf);
715
- BitstringReader rd (buf.data(), buf.size());
716
- invlists.reserve (sz);
785
+ READVECTOR(buf);
786
+ BitstringReader rd(buf.data(), buf.size());
787
+ invlists.reserve(sz);
717
788
  for (size_t i = 0; i < sz; i++) {
718
789
  uint64_t hash = rd.read(b);
719
790
  uint64_t ilsz = rd.read(il_nbit);
720
- auto & il = invlists[hash];
721
- READVECTOR (il.ids);
722
- FAISS_THROW_IF_NOT (il.ids.size() == ilsz);
723
- READVECTOR (il.vecs);
791
+ auto& il = invlists[hash];
792
+ READVECTOR(il.ids);
793
+ FAISS_THROW_IF_NOT(il.ids.size() == ilsz);
794
+ READVECTOR(il.vecs);
724
795
  }
725
796
  }
726
797
 
727
798
  static void read_binary_multi_hash_map(
728
- IndexBinaryMultiHash::Map &map,
729
- int b, size_t ntotal,
730
- IOReader *f)
731
- {
799
+ IndexBinaryMultiHash::Map& map,
800
+ int b,
801
+ size_t ntotal,
802
+ IOReader* f) {
732
803
  int id_bits;
733
804
  size_t sz;
734
- READ1 (id_bits);
735
- READ1 (sz);
805
+ READ1(id_bits);
806
+ READ1(sz);
736
807
  std::vector<uint8_t> buf;
737
- READVECTOR (buf);
808
+ READVECTOR(buf);
738
809
  size_t nbit = (b + id_bits) * sz + ntotal * id_bits;
739
- FAISS_THROW_IF_NOT (buf.size() == (nbit + 7) / 8);
740
- BitstringReader rd (buf.data(), buf.size());
741
- map.reserve (sz);
810
+ FAISS_THROW_IF_NOT(buf.size() == (nbit + 7) / 8);
811
+ BitstringReader rd(buf.data(), buf.size());
812
+ map.reserve(sz);
742
813
  for (size_t i = 0; i < sz; i++) {
743
814
  uint64_t hash = rd.read(b);
744
815
  uint64_t ilsz = rd.read(id_bits);
745
- auto & il = map[hash];
816
+ auto& il = map[hash];
746
817
  for (size_t j = 0; j < ilsz; j++) {
747
- il.push_back (rd.read (id_bits));
818
+ il.push_back(rd.read(id_bits));
748
819
  }
749
820
  }
750
821
  }
751
822
 
752
-
753
-
754
- IndexBinary *read_index_binary (IOReader *f, int io_flags) {
755
- IndexBinary * idx = nullptr;
823
+ IndexBinary* read_index_binary(IOReader* f, int io_flags) {
824
+ IndexBinary* idx = nullptr;
756
825
  uint32_t h;
757
- READ1 (h);
758
- if (h == fourcc ("IBxF")) {
759
- IndexBinaryFlat *idxf = new IndexBinaryFlat ();
760
- read_index_binary_header (idxf, f);
761
- READVECTOR (idxf->xb);
762
- FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->code_size);
826
+ READ1(h);
827
+ if (h == fourcc("IBxF")) {
828
+ IndexBinaryFlat* idxf = new IndexBinaryFlat();
829
+ read_index_binary_header(idxf, f);
830
+ READVECTOR(idxf->xb);
831
+ FAISS_THROW_IF_NOT(idxf->xb.size() == idxf->ntotal * idxf->code_size);
763
832
  // leak!
764
833
  idx = idxf;
765
- } else if (h == fourcc ("IBwF")) {
766
- IndexBinaryIVF *ivf = new IndexBinaryIVF ();
767
- read_binary_ivf_header (ivf, f);
768
- read_InvertedLists (ivf, f, io_flags);
834
+ } else if (h == fourcc("IBwF")) {
835
+ IndexBinaryIVF* ivf = new IndexBinaryIVF();
836
+ read_binary_ivf_header(ivf, f);
837
+ read_InvertedLists(ivf, f, io_flags);
769
838
  idx = ivf;
770
- } else if (h == fourcc ("IBFf")) {
771
- IndexBinaryFromFloat *idxff = new IndexBinaryFromFloat ();
772
- read_index_binary_header (idxff, f);
839
+ } else if (h == fourcc("IBFf")) {
840
+ IndexBinaryFromFloat* idxff = new IndexBinaryFromFloat();
841
+ read_index_binary_header(idxff, f);
773
842
  idxff->own_fields = true;
774
- idxff->index = read_index (f, io_flags);
843
+ idxff->index = read_index(f, io_flags);
775
844
  idx = idxff;
776
- } else if (h == fourcc ("IBHf")) {
777
- IndexBinaryHNSW *idxhnsw = new IndexBinaryHNSW ();
778
- read_index_binary_header (idxhnsw, f);
779
- read_HNSW (&idxhnsw->hnsw, f);
780
- idxhnsw->storage = read_index_binary (f, io_flags);
845
+ } else if (h == fourcc("IBHf")) {
846
+ IndexBinaryHNSW* idxhnsw = new IndexBinaryHNSW();
847
+ read_index_binary_header(idxhnsw, f);
848
+ read_HNSW(&idxhnsw->hnsw, f);
849
+ idxhnsw->storage = read_index_binary(f, io_flags);
781
850
  idxhnsw->own_fields = true;
782
851
  idx = idxhnsw;
783
- } else if(h == fourcc ("IBMp") || h == fourcc ("IBM2")) {
784
- bool is_map2 = h == fourcc ("IBM2");
785
- IndexBinaryIDMap * idxmap = is_map2 ?
786
- new IndexBinaryIDMap2 () : new IndexBinaryIDMap ();
787
- read_index_binary_header (idxmap, f);
788
- idxmap->index = read_index_binary (f, io_flags);
852
+ } else if (h == fourcc("IBMp") || h == fourcc("IBM2")) {
853
+ bool is_map2 = h == fourcc("IBM2");
854
+ IndexBinaryIDMap* idxmap =
855
+ is_map2 ? new IndexBinaryIDMap2() : new IndexBinaryIDMap();
856
+ read_index_binary_header(idxmap, f);
857
+ idxmap->index = read_index_binary(f, io_flags);
789
858
  idxmap->own_fields = true;
790
- READVECTOR (idxmap->id_map);
859
+ READVECTOR(idxmap->id_map);
791
860
  if (is_map2) {
792
- static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map ();
861
+ static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map();
793
862
  }
794
863
  idx = idxmap;
795
- } else if(h == fourcc("IBHh")) {
796
- IndexBinaryHash *idxh = new IndexBinaryHash ();
797
- read_index_binary_header (idxh, f);
798
- READ1 (idxh->b);
799
- READ1 (idxh->nflip);
864
+ } else if (h == fourcc("IBHh")) {
865
+ IndexBinaryHash* idxh = new IndexBinaryHash();
866
+ read_index_binary_header(idxh, f);
867
+ READ1(idxh->b);
868
+ READ1(idxh->nflip);
800
869
  read_binary_hash_invlists(idxh->invlists, idxh->b, f);
801
870
  idx = idxh;
802
- } else if(h == fourcc("IBHm")) {
803
- IndexBinaryMultiHash* idxmh = new IndexBinaryMultiHash ();
804
- read_index_binary_header (idxmh, f);
805
- idxmh->storage = dynamic_cast<IndexBinaryFlat*> (read_index_binary (f));
806
- FAISS_THROW_IF_NOT(idxmh->storage && idxmh->storage->ntotal == idxmh->ntotal);
871
+ } else if (h == fourcc("IBHm")) {
872
+ IndexBinaryMultiHash* idxmh = new IndexBinaryMultiHash();
873
+ read_index_binary_header(idxmh, f);
874
+ idxmh->storage = dynamic_cast<IndexBinaryFlat*>(read_index_binary(f));
875
+ FAISS_THROW_IF_NOT(
876
+ idxmh->storage && idxmh->storage->ntotal == idxmh->ntotal);
807
877
  idxmh->own_fields = true;
808
- READ1 (idxmh->b);
809
- READ1 (idxmh->nhash);
810
- READ1 (idxmh->nflip);
811
- idxmh->maps.resize (idxmh->nhash);
878
+ READ1(idxmh->b);
879
+ READ1(idxmh->nhash);
880
+ READ1(idxmh->nflip);
881
+ idxmh->maps.resize(idxmh->nhash);
812
882
  for (int i = 0; i < idxmh->nhash; i++) {
813
883
  read_binary_multi_hash_map(
814
884
  idxmh->maps[i], idxmh->b, idxmh->ntotal, f);
@@ -816,25 +886,23 @@ IndexBinary *read_index_binary (IOReader *f, int io_flags) {
816
886
  idx = idxmh;
817
887
  } else {
818
888
  FAISS_THROW_FMT(
819
- "Index type %08x (\"%s\") not recognized",
820
- h, fourcc_inv_printable(h).c_str()
821
- );
889
+ "Index type %08x (\"%s\") not recognized",
890
+ h,
891
+ fourcc_inv_printable(h).c_str());
822
892
  idx = nullptr;
823
893
  }
824
894
  return idx;
825
895
  }
826
896
 
827
- IndexBinary *read_index_binary (FILE * f, int io_flags) {
897
+ IndexBinary* read_index_binary(FILE* f, int io_flags) {
828
898
  FileIOReader reader(f);
829
899
  return read_index_binary(&reader, io_flags);
830
900
  }
831
901
 
832
- IndexBinary *read_index_binary (const char *fname, int io_flags) {
902
+ IndexBinary* read_index_binary(const char* fname, int io_flags) {
833
903
  FileIOReader reader(fname);
834
- IndexBinary *idx = read_index_binary (&reader, io_flags);
904
+ IndexBinary* idx = read_index_binary(&reader, io_flags);
835
905
  return idx;
836
906
  }
837
907
 
838
-
839
-
840
908
  } // namespace faiss