faiss 0.1.4 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (219) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -1
  3. data/README.md +15 -3
  4. data/ext/faiss/ext.cpp +12 -308
  5. data/ext/faiss/extconf.rb +5 -2
  6. data/ext/faiss/index.cpp +189 -0
  7. data/ext/faiss/index_binary.cpp +75 -0
  8. data/ext/faiss/kmeans.cpp +40 -0
  9. data/ext/faiss/numo.hpp +867 -0
  10. data/ext/faiss/pca_matrix.cpp +33 -0
  11. data/ext/faiss/product_quantizer.cpp +53 -0
  12. data/ext/faiss/utils.cpp +13 -0
  13. data/ext/faiss/utils.h +5 -0
  14. data/lib/faiss.rb +0 -5
  15. data/lib/faiss/version.rb +1 -1
  16. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  17. data/vendor/faiss/faiss/AutoTune.h +55 -56
  18. data/vendor/faiss/faiss/Clustering.cpp +334 -195
  19. data/vendor/faiss/faiss/Clustering.h +88 -35
  20. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  21. data/vendor/faiss/faiss/IVFlib.h +48 -51
  22. data/vendor/faiss/faiss/Index.cpp +85 -103
  23. data/vendor/faiss/faiss/Index.h +54 -48
  24. data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
  25. data/vendor/faiss/faiss/Index2Layer.h +22 -22
  26. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  27. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  28. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  29. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  30. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  31. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  32. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  33. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  34. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  35. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  36. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  37. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  38. data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
  39. data/vendor/faiss/faiss/IndexFlat.h +35 -46
  40. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  41. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  42. data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
  43. data/vendor/faiss/faiss/IndexIVF.h +146 -113
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
  54. data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
  55. data/vendor/faiss/faiss/IndexLSH.h +21 -26
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -67
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
  69. data/vendor/faiss/faiss/IndexRefine.h +22 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
  73. data/vendor/faiss/faiss/IndexResidual.h +152 -0
  74. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
  75. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
  76. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  77. data/vendor/faiss/faiss/IndexShards.h +85 -73
  78. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  79. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  80. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  81. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  82. data/vendor/faiss/faiss/MetricType.h +7 -7
  83. data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
  84. data/vendor/faiss/faiss/VectorTransform.h +61 -89
  85. data/vendor/faiss/faiss/clone_index.cpp +77 -73
  86. data/vendor/faiss/faiss/clone_index.h +4 -9
  87. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  88. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  89. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
  90. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  91. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  92. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  93. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  94. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  95. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  96. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  97. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  101. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  102. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  103. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  104. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  106. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  108. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  110. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  112. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  113. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  114. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  115. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  116. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  121. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  122. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  124. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  125. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  126. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  128. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  129. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  130. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  131. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
  133. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  135. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  136. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  137. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  138. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  139. data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
  140. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
  142. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  144. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  145. data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
  146. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  148. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  149. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  151. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
  153. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
  154. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
  156. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  157. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  158. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  159. data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
  160. data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
  161. data/vendor/faiss/faiss/impl/io.cpp +75 -94
  162. data/vendor/faiss/faiss/impl/io.h +31 -41
  163. data/vendor/faiss/faiss/impl/io_macros.h +40 -29
  164. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  165. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  166. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  167. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  171. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  172. data/vendor/faiss/faiss/index_factory.cpp +269 -218
  173. data/vendor/faiss/faiss/index_factory.h +6 -7
  174. data/vendor/faiss/faiss/index_io.h +23 -26
  175. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  177. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  178. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  179. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  180. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  181. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  183. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  185. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  186. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  187. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  188. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  189. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  190. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  191. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  192. data/vendor/faiss/faiss/utils/distances.cpp +301 -310
  193. data/vendor/faiss/faiss/utils/distances.h +133 -118
  194. data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
  195. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  196. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  197. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  198. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  199. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  200. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  201. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  202. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  203. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  204. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  205. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  206. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  207. data/vendor/faiss/faiss/utils/random.h +13 -16
  208. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  209. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  210. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  211. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  212. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  213. data/vendor/faiss/faiss/utils/utils.h +53 -48
  214. metadata +31 -10
  215. data/lib/faiss/index.rb +0 -20
  216. data/lib/faiss/index_binary.rb +0 -20
  217. data/lib/faiss/kmeans.rb +0 -15
  218. data/lib/faiss/pca_matrix.rb +0 -15
  219. data/lib/faiss/product_quantizer.rb +0 -22
@@ -7,9 +7,9 @@
7
7
 
8
8
  #include <faiss/invlists/InvertedListsIOHook.h>
9
9
 
10
+ #include <faiss/impl/FaissAssert.h>
10
11
  #include <faiss/impl/io.h>
11
12
  #include <faiss/impl/io_macros.h>
12
- #include <faiss/impl/FaissAssert.h>
13
13
 
14
14
  #include <faiss/invlists/BlockInvertedLists.h>
15
15
 
@@ -17,24 +17,21 @@
17
17
  #include <faiss/invlists/OnDiskInvertedLists.h>
18
18
  #endif // !_MSC_VER
19
19
 
20
-
21
20
  namespace faiss {
22
21
 
23
-
24
22
  /**********************************************************
25
23
  * InvertedListIOHook's
26
24
  **********************************************************/
27
25
 
28
26
  InvertedListsIOHook::InvertedListsIOHook(
29
- const std::string & key, const std::string & classname):
30
- key(key), classname(classname)
31
- {}
27
+ const std::string& key,
28
+ const std::string& classname)
29
+ : key(key), classname(classname) {}
32
30
 
33
31
  namespace {
34
32
 
35
33
  /// std::vector that deletes its contents
36
- struct IOHookTable: std::vector<InvertedListsIOHook*> {
37
-
34
+ struct IOHookTable : std::vector<InvertedListsIOHook*> {
38
35
  IOHookTable() {
39
36
  #ifndef _MSC_VER
40
37
  push_back(new OnDiskInvertedListsIOHook());
@@ -43,7 +40,7 @@ struct IOHookTable: std::vector<InvertedListsIOHook*> {
43
40
  }
44
41
 
45
42
  ~IOHookTable() {
46
- for (auto x: *this) {
43
+ for (auto x : *this) {
47
44
  delete x;
48
45
  }
49
46
  }
@@ -51,44 +48,41 @@ struct IOHookTable: std::vector<InvertedListsIOHook*> {
51
48
 
52
49
  static IOHookTable InvertedListsIOHook_table;
53
50
 
54
- } // anonymous namepsace
51
+ } // namespace
55
52
 
56
- InvertedListsIOHook* InvertedListsIOHook::lookup(int h)
57
- {
58
- for(const auto & callback: InvertedListsIOHook_table) {
53
+ InvertedListsIOHook* InvertedListsIOHook::lookup(int h) {
54
+ for (const auto& callback : InvertedListsIOHook_table) {
59
55
  if (h == fourcc(callback->key)) {
60
56
  return callback;
61
57
  }
62
58
  }
63
- FAISS_THROW_FMT (
64
- "read_InvertedLists: could not load ArrayInvertedLists as "
65
- "%08x (\"%s\")", h, fourcc_inv_printable(h).c_str()
66
- );
59
+ FAISS_THROW_FMT(
60
+ "read_InvertedLists: could not load ArrayInvertedLists as "
61
+ "%08x (\"%s\")",
62
+ h,
63
+ fourcc_inv_printable(h).c_str());
67
64
  }
68
65
 
69
- InvertedListsIOHook* InvertedListsIOHook::lookup_classname(const std::string & classname)
70
- {
71
- for(const auto & callback: InvertedListsIOHook_table) {
66
+ InvertedListsIOHook* InvertedListsIOHook::lookup_classname(
67
+ const std::string& classname) {
68
+ for (const auto& callback : InvertedListsIOHook_table) {
72
69
  if (callback->classname == classname) {
73
70
  return callback;
74
71
  }
75
72
  }
76
- FAISS_THROW_FMT (
73
+ FAISS_THROW_FMT(
77
74
  "read_InvertedLists: could not find classname %s",
78
- classname.c_str()
79
- );
75
+ classname.c_str());
80
76
  }
81
77
 
82
- void InvertedListsIOHook::add_callback(InvertedListsIOHook *cb)
83
- {
78
+ void InvertedListsIOHook::add_callback(InvertedListsIOHook* cb) {
84
79
  InvertedListsIOHook_table.push_back(cb);
85
80
  }
86
81
 
87
- void InvertedListsIOHook::print_callbacks()
88
- {
82
+ void InvertedListsIOHook::print_callbacks() {
89
83
  printf("registered %zd InvertedListsIOHooks:\n",
90
- InvertedListsIOHook_table.size());
91
- for(const auto & cb: InvertedListsIOHook_table) {
84
+ InvertedListsIOHook_table.size());
85
+ for (const auto& cb : InvertedListsIOHook_table) {
92
86
  printf("%08x %s %s\n",
93
87
  fourcc(cb->key.c_str()),
94
88
  cb->key.c_str(),
@@ -96,11 +90,12 @@ void InvertedListsIOHook::print_callbacks()
96
90
  }
97
91
  }
98
92
 
99
- InvertedLists * InvertedListsIOHook::read_ArrayInvertedLists(
100
- IOReader *, int ,
101
- size_t , size_t ,
102
- const std::vector<size_t> &) const
103
- {
93
+ InvertedLists* InvertedListsIOHook::read_ArrayInvertedLists(
94
+ IOReader*,
95
+ int,
96
+ size_t,
97
+ size_t,
98
+ const std::vector<size_t>&) const {
104
99
  FAISS_THROW_FMT("read to array not implemented for %s", classname.c_str());
105
100
  }
106
101
 
@@ -7,14 +7,12 @@
7
7
 
8
8
  #pragma once
9
9
 
10
- #include <string>
11
- #include <faiss/invlists/InvertedLists.h>
12
10
  #include <faiss/impl/io.h>
13
-
11
+ #include <faiss/invlists/InvertedLists.h>
12
+ #include <string>
14
13
 
15
14
  namespace faiss {
16
15
 
17
-
18
16
  /** Callbacks to handle other types of InvertedList objects.
19
17
  *
20
18
  * The callbacks should be registered with add_callback before calling
@@ -26,38 +24,39 @@ namespace faiss {
26
24
  * - the class name (as given by typeid.name) at write time
27
25
  */
28
26
  struct InvertedListsIOHook {
29
- const std::string key; ///< string version of the fourcc
27
+ const std::string key; ///< string version of the fourcc
30
28
  const std::string classname; ///< typeid.name
31
29
 
32
- InvertedListsIOHook(const std::string & key, const std::string & classname);
30
+ InvertedListsIOHook(const std::string& key, const std::string& classname);
33
31
 
34
32
  /// write the index to the IOWriter (including the fourcc)
35
- virtual void write(const InvertedLists *ils, IOWriter *f) const = 0;
33
+ virtual void write(const InvertedLists* ils, IOWriter* f) const = 0;
36
34
 
37
35
  /// called when the fourcc matches this class's fourcc
38
- virtual InvertedLists * read(IOReader *f, int io_flags) const = 0;
36
+ virtual InvertedLists* read(IOReader* f, int io_flags) const = 0;
39
37
 
40
38
  /** read from a ArrayInvertedLists into this invertedlist type.
41
- * For this to work, the callback has to be enabled and the io_flag has to be set to
42
- * IO_FLAG_SKIP_IVF_DATA | (16 upper bits of the fourcc)
39
+ * For this to work, the callback has to be enabled and the io_flag has to
40
+ * be set to IO_FLAG_SKIP_IVF_DATA | (16 upper bits of the fourcc)
43
41
  *
44
42
  * (default implementation fails)
45
43
  */
46
- virtual InvertedLists * read_ArrayInvertedLists(
47
- IOReader *f, int io_flags,
48
- size_t nlist, size_t code_size,
49
- const std::vector<size_t> &sizes) const;
44
+ virtual InvertedLists* read_ArrayInvertedLists(
45
+ IOReader* f,
46
+ int io_flags,
47
+ size_t nlist,
48
+ size_t code_size,
49
+ const std::vector<size_t>& sizes) const;
50
50
 
51
51
  virtual ~InvertedListsIOHook() {}
52
52
 
53
53
  /**************************** Manage the set of callbacks ******/
54
54
 
55
55
  // transfers ownership
56
- static void add_callback(InvertedListsIOHook *);
56
+ static void add_callback(InvertedListsIOHook*);
57
57
  static void print_callbacks();
58
58
  static InvertedListsIOHook* lookup(int h);
59
- static InvertedListsIOHook* lookup_classname(const std::string & classname);
60
-
59
+ static InvertedListsIOHook* lookup_classname(const std::string& classname);
61
60
  };
62
61
 
63
- } // namespace faiss
62
+ } // namespace faiss
@@ -14,9 +14,9 @@
14
14
  #include <unordered_set>
15
15
 
16
16
  #include <sys/mman.h>
17
- #include <unistd.h>
18
- #include <sys/types.h>
19
17
  #include <sys/stat.h>
18
+ #include <sys/types.h>
19
+ #include <unistd.h>
20
20
 
21
21
  #include <faiss/impl/FaissAssert.h>
22
22
  #include <faiss/utils/utils.h>
@@ -24,15 +24,12 @@
24
24
  #include <faiss/impl/io.h>
25
25
  #include <faiss/impl/io_macros.h>
26
26
 
27
-
28
27
  namespace faiss {
29
28
 
30
-
31
29
  /**********************************************
32
30
  * LockLevels
33
31
  **********************************************/
34
32
 
35
-
36
33
  struct LockLevels {
37
34
  /* There n times lock1(n), one lock2 and one lock3
38
35
  * Invariants:
@@ -47,8 +44,8 @@ struct LockLevels {
47
44
  pthread_cond_t level3_cv;
48
45
 
49
46
  std::unordered_set<int> level1_holders; // which level1 locks are held
50
- int n_level2; // nb threads that wait on level2
51
- bool level3_in_use; // a threads waits on level3
47
+ int n_level2; // nb threads that wait on level2
48
+ bool level3_in_use; // a threads waits on level3
52
49
  bool level2_in_use;
53
50
 
54
51
  LockLevels() {
@@ -91,7 +88,7 @@ struct LockLevels {
91
88
 
92
89
  void lock_2() {
93
90
  pthread_mutex_lock(&mutex1);
94
- n_level2 ++;
91
+ n_level2++;
95
92
  if (level3_in_use) { // tell waiting level3 that we are blocked
96
93
  pthread_cond_signal(&level3_cv);
97
94
  }
@@ -105,7 +102,7 @@ struct LockLevels {
105
102
  void unlock_2() {
106
103
  pthread_mutex_lock(&mutex1);
107
104
  level2_in_use = false;
108
- n_level2 --;
105
+ n_level2--;
109
106
  pthread_cond_signal(&level2_cv);
110
107
  pthread_mutex_unlock(&mutex1);
111
108
  }
@@ -128,17 +125,17 @@ struct LockLevels {
128
125
  pthread_mutex_unlock(&mutex1);
129
126
  }
130
127
 
131
- void print () {
128
+ void print() {
132
129
  pthread_mutex_lock(&mutex1);
133
130
  printf("State: level3_in_use=%d n_level2=%d level1_holders: [",
134
- int(level3_in_use), n_level2);
131
+ int(level3_in_use),
132
+ n_level2);
135
133
  for (int k : level1_holders) {
136
134
  printf("%d ", k);
137
135
  }
138
136
  printf("]\n");
139
137
  pthread_mutex_unlock(&mutex1);
140
138
  }
141
-
142
139
  };
143
140
 
144
141
  /**********************************************
@@ -146,27 +143,27 @@ struct LockLevels {
146
143
  **********************************************/
147
144
 
148
145
  struct OnDiskInvertedLists::OngoingPrefetch {
149
-
150
146
  struct Thread {
151
147
  pthread_t pth;
152
- OngoingPrefetch *pf;
148
+ OngoingPrefetch* pf;
153
149
 
154
- bool one_list () {
150
+ bool one_list() {
155
151
  idx_t list_no = pf->get_next_list();
156
- if(list_no == -1) return false;
157
- const OnDiskInvertedLists *od = pf->od;
158
- od->locks->lock_1 (list_no);
159
- size_t n = od->list_size (list_no);
160
- const Index::idx_t *idx = od->get_ids (list_no);
161
- const uint8_t *codes = od->get_codes (list_no);
152
+ if (list_no == -1)
153
+ return false;
154
+ const OnDiskInvertedLists* od = pf->od;
155
+ od->locks->lock_1(list_no);
156
+ size_t n = od->list_size(list_no);
157
+ const Index::idx_t* idx = od->get_ids(list_no);
158
+ const uint8_t* codes = od->get_codes(list_no);
162
159
  int cs = 0;
163
- for (size_t i = 0; i < n;i++) {
160
+ for (size_t i = 0; i < n; i++) {
164
161
  cs += idx[i];
165
162
  }
166
- const idx_t *codes8 = (const idx_t*)codes;
163
+ const idx_t* codes8 = (const idx_t*)codes;
167
164
  idx_t n8 = n * od->code_size / 8;
168
165
 
169
- for (size_t i = 0; i < n8;i++) {
166
+ for (size_t i = 0; i < n8; i++) {
170
167
  cs += codes8[i];
171
168
  }
172
169
  od->locks->unlock_1(list_no);
@@ -174,7 +171,6 @@ struct OnDiskInvertedLists::OngoingPrefetch {
174
171
  global_cs += cs & 1;
175
172
  return true;
176
173
  }
177
-
178
174
  };
179
175
 
180
176
  std::vector<Thread> threads;
@@ -189,125 +185,123 @@ struct OnDiskInvertedLists::OngoingPrefetch {
189
185
  // pretext to avoid code below to be optimized out
190
186
  static int global_cs;
191
187
 
192
- const OnDiskInvertedLists *od;
188
+ const OnDiskInvertedLists* od;
193
189
 
194
- explicit OngoingPrefetch (const OnDiskInvertedLists *od): od (od)
195
- {
196
- pthread_mutex_init (&mutex, nullptr);
197
- pthread_mutex_init (&list_ids_mutex, nullptr);
190
+ explicit OngoingPrefetch(const OnDiskInvertedLists* od) : od(od) {
191
+ pthread_mutex_init(&mutex, nullptr);
192
+ pthread_mutex_init(&list_ids_mutex, nullptr);
198
193
  cur_list = 0;
199
194
  }
200
195
 
201
- static void* prefetch_list (void * arg) {
202
- Thread *th = static_cast<Thread*>(arg);
196
+ static void* prefetch_list(void* arg) {
197
+ Thread* th = static_cast<Thread*>(arg);
203
198
 
204
- while (th->one_list()) ;
199
+ while (th->one_list())
200
+ ;
205
201
 
206
202
  return nullptr;
207
203
  }
208
204
 
209
- idx_t get_next_list () {
205
+ idx_t get_next_list() {
210
206
  idx_t list_no = -1;
211
- pthread_mutex_lock (&list_ids_mutex);
207
+ pthread_mutex_lock(&list_ids_mutex);
212
208
  if (cur_list >= 0 && cur_list < list_ids.size()) {
213
209
  list_no = list_ids[cur_list++];
214
210
  }
215
- pthread_mutex_unlock (&list_ids_mutex);
211
+ pthread_mutex_unlock(&list_ids_mutex);
216
212
  return list_no;
217
213
  }
218
214
 
219
- void prefetch_lists (const idx_t *list_nos, int n) {
220
- pthread_mutex_lock (&mutex);
221
- pthread_mutex_lock (&list_ids_mutex);
222
- list_ids.clear ();
223
- pthread_mutex_unlock (&list_ids_mutex);
224
- for (auto &th: threads) {
225
- pthread_join (th.pth, nullptr);
215
+ void prefetch_lists(const idx_t* list_nos, int n) {
216
+ pthread_mutex_lock(&mutex);
217
+ pthread_mutex_lock(&list_ids_mutex);
218
+ list_ids.clear();
219
+ pthread_mutex_unlock(&list_ids_mutex);
220
+ for (auto& th : threads) {
221
+ pthread_join(th.pth, nullptr);
226
222
  }
227
223
 
228
- threads.resize (0);
224
+ threads.resize(0);
229
225
  cur_list = 0;
230
- int nt = std::min (n, od->prefetch_nthread);
226
+ int nt = std::min(n, od->prefetch_nthread);
231
227
 
232
228
  if (nt > 0) {
233
229
  // prepare tasks
234
230
  for (int i = 0; i < n; i++) {
235
231
  idx_t list_no = list_nos[i];
236
232
  if (list_no >= 0 && od->list_size(list_no) > 0) {
237
- list_ids.push_back (list_no);
233
+ list_ids.push_back(list_no);
238
234
  }
239
235
  }
240
236
  // prepare threads
241
- threads.resize (nt);
242
- for (Thread &th: threads) {
237
+ threads.resize(nt);
238
+ for (Thread& th : threads) {
243
239
  th.pf = this;
244
- pthread_create (&th.pth, nullptr, prefetch_list, &th);
240
+ pthread_create(&th.pth, nullptr, prefetch_list, &th);
245
241
  }
246
242
  }
247
- pthread_mutex_unlock (&mutex);
243
+ pthread_mutex_unlock(&mutex);
248
244
  }
249
245
 
250
- ~OngoingPrefetch () {
251
- pthread_mutex_lock (&mutex);
252
- for (auto &th: threads) {
253
- pthread_join (th.pth, nullptr);
246
+ ~OngoingPrefetch() {
247
+ pthread_mutex_lock(&mutex);
248
+ for (auto& th : threads) {
249
+ pthread_join(th.pth, nullptr);
254
250
  }
255
- pthread_mutex_unlock (&mutex);
256
- pthread_mutex_destroy (&mutex);
257
- pthread_mutex_destroy (&list_ids_mutex);
251
+ pthread_mutex_unlock(&mutex);
252
+ pthread_mutex_destroy(&mutex);
253
+ pthread_mutex_destroy(&list_ids_mutex);
258
254
  }
259
-
260
255
  };
261
256
 
262
257
  int OnDiskInvertedLists::OngoingPrefetch::global_cs = 0;
263
258
 
264
-
265
- void OnDiskInvertedLists::prefetch_lists (const idx_t *list_nos, int n) const
266
- {
267
- pf->prefetch_lists (list_nos, n);
259
+ void OnDiskInvertedLists::prefetch_lists(const idx_t* list_nos, int n) const {
260
+ pf->prefetch_lists(list_nos, n);
268
261
  }
269
262
 
270
-
271
-
272
263
  /**********************************************
273
264
  * OnDiskInvertedLists: mmapping
274
265
  **********************************************/
275
266
 
276
-
277
- void OnDiskInvertedLists::do_mmap ()
278
- {
279
- const char *rw_flags = read_only ? "r" : "r+";
267
+ void OnDiskInvertedLists::do_mmap() {
268
+ const char* rw_flags = read_only ? "r" : "r+";
280
269
  int prot = read_only ? PROT_READ : PROT_WRITE | PROT_READ;
281
- FILE *f = fopen (filename.c_str(), rw_flags);
282
- FAISS_THROW_IF_NOT_FMT (f, "could not open %s in mode %s: %s",
283
- filename.c_str(), rw_flags, strerror(errno));
284
-
285
- uint8_t * ptro = (uint8_t*)mmap (nullptr, totsize,
286
- prot, MAP_SHARED, fileno (f), 0);
287
-
288
- FAISS_THROW_IF_NOT_FMT (ptro != MAP_FAILED,
289
- "could not mmap %s: %s",
290
- filename.c_str(),
291
- strerror(errno));
270
+ FILE* f = fopen(filename.c_str(), rw_flags);
271
+ FAISS_THROW_IF_NOT_FMT(
272
+ f,
273
+ "could not open %s in mode %s: %s",
274
+ filename.c_str(),
275
+ rw_flags,
276
+ strerror(errno));
277
+
278
+ uint8_t* ptro =
279
+ (uint8_t*)mmap(nullptr, totsize, prot, MAP_SHARED, fileno(f), 0);
280
+
281
+ FAISS_THROW_IF_NOT_FMT(
282
+ ptro != MAP_FAILED,
283
+ "could not mmap %s: %s",
284
+ filename.c_str(),
285
+ strerror(errno));
292
286
  ptr = ptro;
293
- fclose (f);
294
-
287
+ fclose(f);
295
288
  }
296
289
 
297
- void OnDiskInvertedLists::update_totsize (size_t new_size)
298
- {
299
-
290
+ void OnDiskInvertedLists::update_totsize(size_t new_size) {
300
291
  // unmap file
301
292
  if (ptr != nullptr) {
302
- int err = munmap (ptr, totsize);
303
- FAISS_THROW_IF_NOT_FMT (err == 0, "munmap error: %s", strerror(errno));
293
+ int err = munmap(ptr, totsize);
294
+ FAISS_THROW_IF_NOT_FMT(err == 0, "munmap error: %s", strerror(errno));
304
295
  }
305
296
  if (totsize == 0) {
306
297
  // must create file before truncating it
307
- FILE *f = fopen (filename.c_str(), "w");
308
- FAISS_THROW_IF_NOT_FMT (f, "could not open %s in mode W: %s",
309
- filename.c_str(), strerror(errno));
310
- fclose (f);
298
+ FILE* f = fopen(filename.c_str(), "w");
299
+ FAISS_THROW_IF_NOT_FMT(
300
+ f,
301
+ "could not open %s in mode W: %s",
302
+ filename.c_str(),
303
+ strerror(errno));
304
+ fclose(f);
311
305
  }
312
306
 
313
307
  if (new_size > totsize) {
@@ -315,7 +309,7 @@ void OnDiskInvertedLists::update_totsize (size_t new_size)
315
309
  slots.back().offset + slots.back().capacity == totsize) {
316
310
  slots.back().capacity += new_size - totsize;
317
311
  } else {
318
- slots.push_back (Slot(totsize, new_size - totsize));
312
+ slots.push_back(Slot(totsize, new_size - totsize));
319
313
  }
320
314
  } else {
321
315
  assert(!"not implemented");
@@ -324,89 +318,69 @@ void OnDiskInvertedLists::update_totsize (size_t new_size)
324
318
  totsize = new_size;
325
319
 
326
320
  // create file
327
- printf ("resizing %s to %zd bytes\n", filename.c_str(), totsize);
321
+ printf("resizing %s to %zd bytes\n", filename.c_str(), totsize);
328
322
 
329
- int err = truncate (filename.c_str(), totsize);
323
+ int err = truncate(filename.c_str(), totsize);
330
324
 
331
- FAISS_THROW_IF_NOT_FMT (err == 0, "truncate %s to %ld: %s",
332
- filename.c_str(), totsize,
333
- strerror(errno));
334
- do_mmap ();
325
+ FAISS_THROW_IF_NOT_FMT(
326
+ err == 0,
327
+ "truncate %s to %ld: %s",
328
+ filename.c_str(),
329
+ totsize,
330
+ strerror(errno));
331
+ do_mmap();
335
332
  }
336
333
 
337
-
338
-
339
-
340
-
341
-
342
334
  /**********************************************
343
335
  * OnDiskInvertedLists
344
336
  **********************************************/
345
337
 
346
338
  #define INVALID_OFFSET (size_t)(-1)
347
339
 
348
- OnDiskOneList::OnDiskOneList ():
349
- size (0), capacity (0), offset (INVALID_OFFSET)
350
- {}
351
-
352
- OnDiskInvertedLists::Slot::Slot (size_t offset, size_t capacity):
353
- offset (offset), capacity (capacity)
354
- {}
340
+ OnDiskOneList::OnDiskOneList() : size(0), capacity(0), offset(INVALID_OFFSET) {}
355
341
 
356
- OnDiskInvertedLists::Slot::Slot ():
357
- offset (0), capacity (0)
358
- {}
342
+ OnDiskInvertedLists::Slot::Slot(size_t offset, size_t capacity)
343
+ : offset(offset), capacity(capacity) {}
359
344
 
345
+ OnDiskInvertedLists::Slot::Slot() : offset(0), capacity(0) {}
360
346
 
361
-
362
- OnDiskInvertedLists::OnDiskInvertedLists (
363
- size_t nlist, size_t code_size,
364
- const char *filename):
365
- InvertedLists (nlist, code_size),
366
- filename (filename),
367
- totsize (0),
368
- ptr (nullptr),
369
- read_only (false),
370
- locks (new LockLevels ()),
371
- pf (new OngoingPrefetch (this)),
372
- prefetch_nthread (32)
373
- {
374
- lists.resize (nlist);
347
+ OnDiskInvertedLists::OnDiskInvertedLists(
348
+ size_t nlist,
349
+ size_t code_size,
350
+ const char* filename)
351
+ : InvertedLists(nlist, code_size),
352
+ filename(filename),
353
+ totsize(0),
354
+ ptr(nullptr),
355
+ read_only(false),
356
+ locks(new LockLevels()),
357
+ pf(new OngoingPrefetch(this)),
358
+ prefetch_nthread(32) {
359
+ lists.resize(nlist);
375
360
 
376
361
  // slots starts empty
377
362
  }
378
363
 
379
- OnDiskInvertedLists::OnDiskInvertedLists ():
380
- OnDiskInvertedLists (0, 0, "")
381
- {
382
- }
364
+ OnDiskInvertedLists::OnDiskInvertedLists() : OnDiskInvertedLists(0, 0, "") {}
383
365
 
384
- OnDiskInvertedLists::~OnDiskInvertedLists ()
385
- {
366
+ OnDiskInvertedLists::~OnDiskInvertedLists() {
386
367
  delete pf;
387
368
 
388
369
  // unmap all lists
389
370
  if (ptr != nullptr) {
390
- int err = munmap (ptr, totsize);
371
+ int err = munmap(ptr, totsize);
391
372
  if (err != 0) {
392
- fprintf(stderr, "mumap error: %s",
393
- strerror(errno));
373
+ fprintf(stderr, "mumap error: %s", strerror(errno));
394
374
  }
395
375
  }
396
376
  delete locks;
397
377
  }
398
378
 
399
-
400
-
401
-
402
- size_t OnDiskInvertedLists::list_size(size_t list_no) const
403
- {
379
+ size_t OnDiskInvertedLists::list_size(size_t list_no) const {
404
380
  return lists[list_no].size;
405
381
  }
406
382
 
407
-
408
- const uint8_t * OnDiskInvertedLists::get_codes (size_t list_no) const
409
- {
383
+ const uint8_t* OnDiskInvertedLists::get_codes(size_t list_no) const {
410
384
  if (lists[list_no].offset == INVALID_OFFSET) {
411
385
  return nullptr;
412
386
  }
@@ -414,68 +388,65 @@ const uint8_t * OnDiskInvertedLists::get_codes (size_t list_no) const
414
388
  return ptr + lists[list_no].offset;
415
389
  }
416
390
 
417
- const Index::idx_t * OnDiskInvertedLists::get_ids (size_t list_no) const
418
- {
391
+ const Index::idx_t* OnDiskInvertedLists::get_ids(size_t list_no) const {
419
392
  if (lists[list_no].offset == INVALID_OFFSET) {
420
393
  return nullptr;
421
394
  }
422
395
 
423
- return (const idx_t*)(ptr + lists[list_no].offset +
424
- code_size * lists[list_no].capacity);
396
+ return (
397
+ const idx_t*)(ptr + lists[list_no].offset + code_size * lists[list_no].capacity);
425
398
  }
426
399
 
427
-
428
- void OnDiskInvertedLists::update_entries (
429
- size_t list_no, size_t offset, size_t n_entry,
430
- const idx_t *ids_in, const uint8_t *codes_in)
431
- {
432
- FAISS_THROW_IF_NOT (!read_only);
433
- if (n_entry == 0) return;
434
- const List & l = lists[list_no];
435
- assert (n_entry + offset <= l.size);
436
- idx_t *ids = const_cast<idx_t*>(get_ids (list_no));
437
- memcpy (ids + offset, ids_in, sizeof(ids_in[0]) * n_entry);
438
- uint8_t *codes = const_cast<uint8_t*>(get_codes (list_no));
439
- memcpy (codes + offset * code_size, codes_in, code_size * n_entry);
400
+ void OnDiskInvertedLists::update_entries(
401
+ size_t list_no,
402
+ size_t offset,
403
+ size_t n_entry,
404
+ const idx_t* ids_in,
405
+ const uint8_t* codes_in) {
406
+ FAISS_THROW_IF_NOT(!read_only);
407
+ if (n_entry == 0)
408
+ return;
409
+ const List& l = lists[list_no];
410
+ assert(n_entry + offset <= l.size);
411
+ idx_t* ids = const_cast<idx_t*>(get_ids(list_no));
412
+ memcpy(ids + offset, ids_in, sizeof(ids_in[0]) * n_entry);
413
+ uint8_t* codes = const_cast<uint8_t*>(get_codes(list_no));
414
+ memcpy(codes + offset * code_size, codes_in, code_size * n_entry);
440
415
  }
441
416
 
442
- size_t OnDiskInvertedLists::add_entries (
443
- size_t list_no, size_t n_entry,
444
- const idx_t* ids, const uint8_t *code)
445
- {
446
- FAISS_THROW_IF_NOT (!read_only);
447
- locks->lock_1 (list_no);
448
- size_t o = list_size (list_no);
449
- resize_locked (list_no, n_entry + o);
450
- update_entries (list_no, o, n_entry, ids, code);
451
- locks->unlock_1 (list_no);
417
+ size_t OnDiskInvertedLists::add_entries(
418
+ size_t list_no,
419
+ size_t n_entry,
420
+ const idx_t* ids,
421
+ const uint8_t* code) {
422
+ FAISS_THROW_IF_NOT(!read_only);
423
+ locks->lock_1(list_no);
424
+ size_t o = list_size(list_no);
425
+ resize_locked(list_no, n_entry + o);
426
+ update_entries(list_no, o, n_entry, ids, code);
427
+ locks->unlock_1(list_no);
452
428
  return o;
453
429
  }
454
430
 
455
- void OnDiskInvertedLists::resize (size_t list_no, size_t new_size)
456
- {
457
- FAISS_THROW_IF_NOT (!read_only);
458
- locks->lock_1 (list_no);
459
- resize_locked (list_no, new_size);
460
- locks->unlock_1 (list_no);
431
+ void OnDiskInvertedLists::resize(size_t list_no, size_t new_size) {
432
+ FAISS_THROW_IF_NOT(!read_only);
433
+ locks->lock_1(list_no);
434
+ resize_locked(list_no, new_size);
435
+ locks->unlock_1(list_no);
461
436
  }
462
437
 
438
+ void OnDiskInvertedLists::resize_locked(size_t list_no, size_t new_size) {
439
+ List& l = lists[list_no];
463
440
 
464
-
465
- void OnDiskInvertedLists::resize_locked (size_t list_no, size_t new_size)
466
- {
467
- List & l = lists[list_no];
468
-
469
- if (new_size <= l.capacity &&
470
- new_size > l.capacity / 2) {
441
+ if (new_size <= l.capacity && new_size > l.capacity / 2) {
471
442
  l.size = new_size;
472
443
  return;
473
444
  }
474
445
 
475
446
  // otherwise we release the current slot, and find a new one
476
447
 
477
- locks->lock_2 ();
478
- free_slot (l.offset, l.capacity);
448
+ locks->lock_2();
449
+ free_slot(l.offset, l.capacity);
479
450
 
480
451
  List new_l;
481
452
 
@@ -487,25 +458,26 @@ void OnDiskInvertedLists::resize_locked (size_t list_no, size_t new_size)
487
458
  while (new_l.capacity < new_size) {
488
459
  new_l.capacity *= 2;
489
460
  }
490
- new_l.offset = allocate_slot (
491
- new_l.capacity * (sizeof(idx_t) + code_size));
461
+ new_l.offset =
462
+ allocate_slot(new_l.capacity * (sizeof(idx_t) + code_size));
492
463
  }
493
464
 
494
465
  // copy common data
495
466
  if (l.offset != new_l.offset) {
496
- size_t n = std::min (new_size, l.size);
467
+ size_t n = std::min(new_size, l.size);
497
468
  if (n > 0) {
498
- memcpy (ptr + new_l.offset, get_codes(list_no), n * code_size);
499
- memcpy (ptr + new_l.offset + new_l.capacity * code_size,
500
- get_ids (list_no), n * sizeof(idx_t));
469
+ memcpy(ptr + new_l.offset, get_codes(list_no), n * code_size);
470
+ memcpy(ptr + new_l.offset + new_l.capacity * code_size,
471
+ get_ids(list_no),
472
+ n * sizeof(idx_t));
501
473
  }
502
474
  }
503
475
 
504
476
  lists[list_no] = new_l;
505
- locks->unlock_2 ();
477
+ locks->unlock_2();
506
478
  }
507
479
 
508
- size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
480
+ size_t OnDiskInvertedLists::allocate_slot(size_t capacity) {
509
481
  // should hold lock2
510
482
 
511
483
  auto it = slots.begin();
@@ -519,19 +491,19 @@ size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
519
491
  while (new_size - totsize < capacity) {
520
492
  new_size *= 2;
521
493
  }
522
- locks->lock_3 ();
494
+ locks->lock_3();
523
495
  update_totsize(new_size);
524
- locks->unlock_3 ();
496
+ locks->unlock_3();
525
497
  it = slots.begin();
526
498
  while (it != slots.end() && it->capacity < capacity) {
527
499
  it++;
528
500
  }
529
- assert (it != slots.end());
501
+ assert(it != slots.end());
530
502
  }
531
503
 
532
504
  size_t o = it->offset;
533
505
  if (it->capacity == capacity) {
534
- slots.erase (it);
506
+ slots.erase(it);
535
507
  } else {
536
508
  // take from beginning of slot
537
509
  it->capacity -= capacity;
@@ -541,12 +513,10 @@ size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
541
513
  return o;
542
514
  }
543
515
 
544
-
545
-
546
- void OnDiskInvertedLists::free_slot (size_t offset, size_t capacity) {
547
-
516
+ void OnDiskInvertedLists::free_slot(size_t offset, size_t capacity) {
548
517
  // should hold lock2
549
- if (capacity == 0) return;
518
+ if (capacity == 0)
519
+ return;
550
520
 
551
521
  auto it = slots.begin();
552
522
  while (it != slots.end() && it->offset <= offset) {
@@ -567,15 +537,15 @@ void OnDiskInvertedLists::free_slot (size_t offset, size_t capacity) {
567
537
  begin_next = it->offset;
568
538
  }
569
539
 
570
- assert (end_prev == inf || offset >= end_prev);
571
- assert (offset + capacity <= begin_next);
540
+ assert(end_prev == inf || offset >= end_prev);
541
+ assert(offset + capacity <= begin_next);
572
542
 
573
543
  if (offset == end_prev) {
574
544
  auto prev = it;
575
545
  prev--;
576
546
  if (offset + capacity == begin_next) {
577
547
  prev->capacity += capacity + it->capacity;
578
- slots.erase (it);
548
+ slots.erase(it);
579
549
  } else {
580
550
  prev->capacity += capacity;
581
551
  }
@@ -584,36 +554,37 @@ void OnDiskInvertedLists::free_slot (size_t offset, size_t capacity) {
584
554
  it->offset -= capacity;
585
555
  it->capacity += capacity;
586
556
  } else {
587
- slots.insert (it, Slot (offset, capacity));
557
+ slots.insert(it, Slot(offset, capacity));
588
558
  }
589
559
  }
590
560
 
591
561
  // TODO shrink global storage if needed
592
562
  }
593
563
 
594
-
595
564
  /*****************************************
596
565
  * Compact form
597
566
  *****************************************/
598
567
 
599
- size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il,
600
- bool verbose)
601
- {
602
- FAISS_THROW_IF_NOT_MSG (totsize == 0, "works only on an empty InvertedLists");
568
+ size_t OnDiskInvertedLists::merge_from(
569
+ const InvertedLists** ils,
570
+ int n_il,
571
+ bool verbose) {
572
+ FAISS_THROW_IF_NOT_MSG(
573
+ totsize == 0, "works only on an empty InvertedLists");
603
574
 
604
- std::vector<size_t> sizes (nlist);
575
+ std::vector<size_t> sizes(nlist);
605
576
  for (int i = 0; i < n_il; i++) {
606
- const InvertedLists *il = ils[i];
607
- FAISS_THROW_IF_NOT (il->nlist == nlist && il->code_size == code_size);
577
+ const InvertedLists* il = ils[i];
578
+ FAISS_THROW_IF_NOT(il->nlist == nlist && il->code_size == code_size);
608
579
 
609
- for (size_t j = 0; j < nlist; j++) {
610
- sizes [j] += il->list_size(j);
580
+ for (size_t j = 0; j < nlist; j++) {
581
+ sizes[j] += il->list_size(j);
611
582
  }
612
583
  }
613
584
 
614
585
  size_t cums = 0;
615
586
  size_t ntotal = 0;
616
- for (size_t j = 0; j < nlist; j++) {
587
+ for (size_t j = 0; j < nlist; j++) {
617
588
  ntotal += sizes[j];
618
589
  lists[j].size = 0;
619
590
  lists[j].capacity = sizes[j];
@@ -621,24 +592,26 @@ size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il,
621
592
  cums += lists[j].capacity * (sizeof(idx_t) + code_size);
622
593
  }
623
594
 
624
- update_totsize (cums);
625
-
595
+ update_totsize(cums);
626
596
 
627
597
  size_t nmerged = 0;
628
598
  double t0 = getmillisecs(), last_t = t0;
629
599
 
630
600
  #pragma omp parallel for
631
601
  for (size_t j = 0; j < nlist; j++) {
632
- List & l = lists[j];
602
+ List& l = lists[j];
633
603
  for (int i = 0; i < n_il; i++) {
634
- const InvertedLists *il = ils[i];
604
+ const InvertedLists* il = ils[i];
635
605
  size_t n_entry = il->list_size(j);
636
606
  l.size += n_entry;
637
- update_entries (j, l.size - n_entry, n_entry,
638
- ScopedIds(il, j).get(),
639
- ScopedCodes(il, j).get());
607
+ update_entries(
608
+ j,
609
+ l.size - n_entry,
610
+ n_entry,
611
+ ScopedIds(il, j).get(),
612
+ ScopedCodes(il, j).get());
640
613
  }
641
- assert (l.size == l.capacity);
614
+ assert(l.size == l.capacity);
642
615
  if (verbose) {
643
616
  #pragma omp critical
644
617
  {
@@ -646,72 +619,64 @@ size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il,
646
619
  double t1 = getmillisecs();
647
620
  if (t1 - last_t > 500) {
648
621
  printf("merged %zd lists in %.3f s\r",
649
- nmerged, (t1 - t0) / 1000.0);
622
+ nmerged,
623
+ (t1 - t0) / 1000.0);
650
624
  fflush(stdout);
651
625
  last_t = t1;
652
626
  }
653
627
  }
654
628
  }
655
629
  }
656
- if(verbose) {
630
+ if (verbose) {
657
631
  printf("\n");
658
632
  }
659
633
 
660
634
  return ntotal;
661
635
  }
662
636
 
663
-
664
- size_t OnDiskInvertedLists::merge_from_1 (const InvertedLists *ils, bool verbose)
665
- {
666
- return merge_from (&ils, 1, verbose);
637
+ size_t OnDiskInvertedLists::merge_from_1(
638
+ const InvertedLists* ils,
639
+ bool verbose) {
640
+ return merge_from(&ils, 1, verbose);
667
641
  }
668
642
 
669
-
670
- void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1)
671
- {
643
+ void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1) {
672
644
  FAISS_THROW_IF_NOT(0 <= l0 && l0 <= l1 && l1 <= nlist);
673
645
 
674
- std::vector<List> new_lists (l1 - l0);
675
- memcpy (new_lists.data(), &lists[l0], (l1 - l0) * sizeof(List));
646
+ std::vector<List> new_lists(l1 - l0);
647
+ memcpy(new_lists.data(), &lists[l0], (l1 - l0) * sizeof(List));
676
648
 
677
649
  lists.swap(new_lists);
678
650
 
679
651
  nlist = l1 - l0;
680
652
  }
681
653
 
682
-
683
- void OnDiskInvertedLists::set_all_lists_sizes(const size_t *sizes)
684
- {
654
+ void OnDiskInvertedLists::set_all_lists_sizes(const size_t* sizes) {
685
655
  size_t ofs = 0;
686
656
  for (size_t i = 0; i < nlist; i++) {
687
657
  lists[i].offset = ofs;
688
658
  lists[i].capacity = lists[i].size = sizes[i];
689
659
  ofs += sizes[i] * (sizeof(idx_t) + code_size);
690
660
  }
691
-
692
661
  }
693
662
 
694
663
  /*******************************************************
695
664
  * I/O support via callbacks
696
665
  *******************************************************/
697
666
 
698
-
699
-
700
-
701
- OnDiskInvertedListsIOHook::OnDiskInvertedListsIOHook():
702
- InvertedListsIOHook("ilod", typeid(OnDiskInvertedLists).name())
703
- {}
704
-
705
-
706
- void OnDiskInvertedListsIOHook::write(const InvertedLists *ils, IOWriter *f) const
707
- {
708
- uint32_t h = fourcc ("ilod");
709
- WRITE1 (h);
710
- WRITE1 (ils->nlist);
711
- WRITE1 (ils->code_size);
712
- const OnDiskInvertedLists *od = dynamic_cast<const OnDiskInvertedLists*> (ils);
667
+ OnDiskInvertedListsIOHook::OnDiskInvertedListsIOHook()
668
+ : InvertedListsIOHook("ilod", typeid(OnDiskInvertedLists).name()) {}
669
+
670
+ void OnDiskInvertedListsIOHook::write(const InvertedLists* ils, IOWriter* f)
671
+ const {
672
+ uint32_t h = fourcc("ilod");
673
+ WRITE1(h);
674
+ WRITE1(ils->nlist);
675
+ WRITE1(ils->code_size);
676
+ const OnDiskInvertedLists* od =
677
+ dynamic_cast<const OnDiskInvertedLists*>(ils);
713
678
  // this is a POD object
714
- WRITEVECTOR (od->lists);
679
+ WRITEVECTOR(od->lists);
715
680
 
716
681
  {
717
682
  std::vector<OnDiskInvertedLists::Slot> v(
@@ -725,14 +690,14 @@ void OnDiskInvertedListsIOHook::write(const InvertedLists *ils, IOWriter *f) con
725
690
  WRITE1(od->totsize);
726
691
  }
727
692
 
728
- InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
729
- {
730
- OnDiskInvertedLists *od = new OnDiskInvertedLists();
693
+ InvertedLists* OnDiskInvertedListsIOHook::read(IOReader* f, int io_flags)
694
+ const {
695
+ OnDiskInvertedLists* od = new OnDiskInvertedLists();
731
696
  od->read_only = io_flags & IO_FLAG_READ_ONLY;
732
- READ1 (od->nlist);
733
- READ1 (od->code_size);
697
+ READ1(od->nlist);
698
+ READ1(od->code_size);
734
699
  // this is a POD object
735
- READVECTOR (od->lists);
700
+ READVECTOR(od->lists);
736
701
  {
737
702
  std::vector<OnDiskInvertedLists::Slot> v;
738
703
  READVECTOR(v);
@@ -744,9 +709,10 @@ InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
744
709
  od->filename.assign(x.begin(), x.end());
745
710
 
746
711
  if (io_flags & IO_FLAG_ONDISK_SAME_DIR) {
747
- FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
748
- FAISS_THROW_IF_NOT_MSG (
749
- reader, "IO_FLAG_ONDISK_SAME_DIR only supported "
712
+ FileIOReader* reader = dynamic_cast<FileIOReader*>(f);
713
+ FAISS_THROW_IF_NOT_MSG(
714
+ reader,
715
+ "IO_FLAG_ONDISK_SAME_DIR only supported "
750
716
  "when reading from file");
751
717
  std::string indexname = reader->name;
752
718
  std::string dirname = "./";
@@ -762,10 +728,10 @@ InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
762
728
  filename = dirname + filename;
763
729
  printf("IO_FLAG_ONDISK_SAME_DIR: "
764
730
  "updating ondisk filename from %s to %s\n",
765
- od->filename.c_str(), filename.c_str());
731
+ od->filename.c_str(),
732
+ filename.c_str());
766
733
  od->filename = filename;
767
734
  }
768
-
769
735
  }
770
736
  READ1(od->totsize);
771
737
  if (!(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
@@ -775,53 +741,51 @@ InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
775
741
  }
776
742
 
777
743
  /** read from a ArrayInvertedLists into this invertedlist type */
778
- InvertedLists * OnDiskInvertedListsIOHook::read_ArrayInvertedLists(
779
- IOReader *f, int /* io_flags */,
780
- size_t nlist, size_t code_size,
781
- const std::vector<size_t> &sizes) const
782
- {
783
- auto ails = new OnDiskInvertedLists ();
744
+ InvertedLists* OnDiskInvertedListsIOHook::read_ArrayInvertedLists(
745
+ IOReader* f,
746
+ int /* io_flags */,
747
+ size_t nlist,
748
+ size_t code_size,
749
+ const std::vector<size_t>& sizes) const {
750
+ auto ails = new OnDiskInvertedLists();
784
751
  ails->nlist = nlist;
785
752
  ails->code_size = code_size;
786
753
  ails->read_only = true;
787
- ails->lists.resize (nlist);
754
+ ails->lists.resize(nlist);
788
755
 
789
- FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
756
+ FileIOReader* reader = dynamic_cast<FileIOReader*>(f);
790
757
  FAISS_THROW_IF_NOT_MSG(reader, "mmap only supported for File objects");
791
- FILE *fdesc = reader->f;
758
+ FILE* fdesc = reader->f;
792
759
  size_t o0 = ftell(fdesc);
793
760
  size_t o = o0;
794
761
  { // do the mmap
795
762
  struct stat buf;
796
- int ret = fstat (fileno(fdesc), &buf);
797
- FAISS_THROW_IF_NOT_FMT (ret == 0,
798
- "fstat failed: %s", strerror(errno));
763
+ int ret = fstat(fileno(fdesc), &buf);
764
+ FAISS_THROW_IF_NOT_FMT(ret == 0, "fstat failed: %s", strerror(errno));
799
765
  ails->totsize = buf.st_size;
800
- ails->ptr = (uint8_t*)mmap (nullptr, ails->totsize,
801
- PROT_READ, MAP_SHARED,
802
- fileno(fdesc), 0);
803
- FAISS_THROW_IF_NOT_FMT (ails->ptr != MAP_FAILED,
804
- "could not mmap: %s",
805
- strerror(errno));
766
+ ails->ptr = (uint8_t*)mmap(
767
+ nullptr,
768
+ ails->totsize,
769
+ PROT_READ,
770
+ MAP_SHARED,
771
+ fileno(fdesc),
772
+ 0);
773
+ FAISS_THROW_IF_NOT_FMT(
774
+ ails->ptr != MAP_FAILED, "could not mmap: %s", strerror(errno));
806
775
  }
807
776
 
808
777
  FAISS_THROW_IF_NOT(o <= ails->totsize);
809
778
 
810
779
  for (size_t i = 0; i < ails->nlist; i++) {
811
- OnDiskInvertedLists::List & l = ails->lists[i];
780
+ OnDiskInvertedLists::List& l = ails->lists[i];
812
781
  l.size = l.capacity = sizes[i];
813
782
  l.offset = o;
814
- o += l.size * (sizeof(OnDiskInvertedLists::idx_t) +
815
- ails->code_size);
783
+ o += l.size * (sizeof(OnDiskInvertedLists::idx_t) + ails->code_size);
816
784
  }
817
785
  // resume normal reading of file
818
- fseek (fdesc, o, SEEK_SET);
786
+ fseek(fdesc, o, SEEK_SET);
819
787
 
820
788
  return ails;
821
789
  }
822
790
 
823
-
824
-
825
-
826
-
827
791
  } // namespace faiss