faiss 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +7 -7
  5. data/ext/faiss/extconf.rb +6 -3
  6. data/ext/faiss/numo.hpp +4 -4
  7. data/ext/faiss/utils.cpp +1 -1
  8. data/ext/faiss/utils.h +1 -1
  9. data/lib/faiss/version.rb +1 -1
  10. data/vendor/faiss/faiss/AutoTune.cpp +292 -291
  11. data/vendor/faiss/faiss/AutoTune.h +55 -56
  12. data/vendor/faiss/faiss/Clustering.cpp +365 -194
  13. data/vendor/faiss/faiss/Clustering.h +102 -35
  14. data/vendor/faiss/faiss/IVFlib.cpp +171 -195
  15. data/vendor/faiss/faiss/IVFlib.h +48 -51
  16. data/vendor/faiss/faiss/Index.cpp +85 -103
  17. data/vendor/faiss/faiss/Index.h +54 -48
  18. data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
  19. data/vendor/faiss/faiss/Index2Layer.h +22 -36
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  21. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
  22. data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
  23. data/vendor/faiss/faiss/IndexBinary.h +140 -132
  24. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
  25. data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
  26. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
  27. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
  28. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
  29. data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
  30. data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
  31. data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
  32. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
  33. data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
  34. data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
  35. data/vendor/faiss/faiss/IndexFlat.h +42 -59
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
  39. data/vendor/faiss/faiss/IndexHNSW.h +57 -41
  40. data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
  41. data/vendor/faiss/faiss/IndexIVF.h +169 -118
  42. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  43. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  44. data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
  45. data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
  50. data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
  51. data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
  52. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
  53. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
  54. data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
  55. data/vendor/faiss/faiss/IndexLSH.h +20 -38
  56. data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
  57. data/vendor/faiss/faiss/IndexLattice.h +11 -16
  58. data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
  59. data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
  60. data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
  61. data/vendor/faiss/faiss/IndexNSG.h +85 -0
  62. data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
  63. data/vendor/faiss/faiss/IndexPQ.h +64 -82
  64. data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
  65. data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
  66. data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
  67. data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
  68. data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
  69. data/vendor/faiss/faiss/IndexRefine.h +32 -23
  70. data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
  71. data/vendor/faiss/faiss/IndexReplicas.h +62 -56
  72. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
  73. data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
  74. data/vendor/faiss/faiss/IndexShards.cpp +256 -240
  75. data/vendor/faiss/faiss/IndexShards.h +85 -73
  76. data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
  77. data/vendor/faiss/faiss/MatrixStats.h +7 -10
  78. data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
  79. data/vendor/faiss/faiss/MetaIndexes.h +40 -34
  80. data/vendor/faiss/faiss/MetricType.h +7 -7
  81. data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
  82. data/vendor/faiss/faiss/VectorTransform.h +64 -89
  83. data/vendor/faiss/faiss/clone_index.cpp +78 -73
  84. data/vendor/faiss/faiss/clone_index.h +4 -9
  85. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
  86. data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
  87. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
  88. data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
  89. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
  90. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
  91. data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
  92. data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
  93. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  94. data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
  95. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
  96. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
  97. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
  98. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
  99. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
  100. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
  101. data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
  102. data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
  103. data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
  104. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
  105. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
  106. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
  107. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
  108. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
  109. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
  110. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
  111. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
  112. data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
  113. data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
  114. data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
  115. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
  116. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
  117. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
  118. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
  119. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
  120. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
  121. data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
  122. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
  123. data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
  124. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
  125. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
  126. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
  127. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
  128. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
  129. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
  130. data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
  131. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
  132. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
  133. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
  134. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
  135. data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
  136. data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
  137. data/vendor/faiss/faiss/impl/FaissException.h +41 -29
  138. data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
  139. data/vendor/faiss/faiss/impl/HNSW.h +179 -200
  140. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
  141. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
  142. data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
  143. data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
  144. data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
  145. data/vendor/faiss/faiss/impl/NSG.h +199 -0
  146. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
  147. data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
  148. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
  149. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
  150. data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
  151. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
  152. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
  153. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
  154. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
  155. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
  156. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
  157. data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
  158. data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
  159. data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
  160. data/vendor/faiss/faiss/impl/io.cpp +76 -95
  161. data/vendor/faiss/faiss/impl/io.h +31 -41
  162. data/vendor/faiss/faiss/impl/io_macros.h +60 -29
  163. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  164. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  165. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
  166. data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
  167. data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
  168. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
  169. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
  170. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
  171. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
  172. data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
  173. data/vendor/faiss/faiss/index_factory.cpp +619 -397
  174. data/vendor/faiss/faiss/index_factory.h +8 -6
  175. data/vendor/faiss/faiss/index_io.h +23 -26
  176. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
  177. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
  178. data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
  179. data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
  180. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
  181. data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
  182. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
  183. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
  184. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
  185. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
  186. data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
  187. data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
  188. data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
  189. data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
  190. data/vendor/faiss/faiss/utils/Heap.h +186 -209
  191. data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
  192. data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
  193. data/vendor/faiss/faiss/utils/distances.cpp +305 -312
  194. data/vendor/faiss/faiss/utils/distances.h +170 -122
  195. data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
  196. data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
  197. data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
  198. data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
  199. data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
  200. data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
  201. data/vendor/faiss/faiss/utils/hamming.h +62 -85
  202. data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
  203. data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
  204. data/vendor/faiss/faiss/utils/partitioning.h +26 -21
  205. data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
  206. data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
  207. data/vendor/faiss/faiss/utils/random.cpp +39 -63
  208. data/vendor/faiss/faiss/utils/random.h +13 -16
  209. data/vendor/faiss/faiss/utils/simdlib.h +4 -2
  210. data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
  211. data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
  212. data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
  213. data/vendor/faiss/faiss/utils/utils.cpp +304 -287
  214. data/vendor/faiss/faiss/utils/utils.h +54 -49
  215. metadata +29 -4
@@ -14,9 +14,9 @@
14
14
  #include <unordered_set>
15
15
 
16
16
  #include <sys/mman.h>
17
- #include <unistd.h>
18
- #include <sys/types.h>
19
17
  #include <sys/stat.h>
18
+ #include <sys/types.h>
19
+ #include <unistd.h>
20
20
 
21
21
  #include <faiss/impl/FaissAssert.h>
22
22
  #include <faiss/utils/utils.h>
@@ -24,15 +24,12 @@
24
24
  #include <faiss/impl/io.h>
25
25
  #include <faiss/impl/io_macros.h>
26
26
 
27
-
28
27
  namespace faiss {
29
28
 
30
-
31
29
  /**********************************************
32
30
  * LockLevels
33
31
  **********************************************/
34
32
 
35
-
36
33
  struct LockLevels {
37
34
  /* There n times lock1(n), one lock2 and one lock3
38
35
  * Invariants:
@@ -47,8 +44,8 @@ struct LockLevels {
47
44
  pthread_cond_t level3_cv;
48
45
 
49
46
  std::unordered_set<int> level1_holders; // which level1 locks are held
50
- int n_level2; // nb threads that wait on level2
51
- bool level3_in_use; // a threads waits on level3
47
+ int n_level2; // nb threads that wait on level2
48
+ bool level3_in_use; // a threads waits on level3
52
49
  bool level2_in_use;
53
50
 
54
51
  LockLevels() {
@@ -91,7 +88,7 @@ struct LockLevels {
91
88
 
92
89
  void lock_2() {
93
90
  pthread_mutex_lock(&mutex1);
94
- n_level2 ++;
91
+ n_level2++;
95
92
  if (level3_in_use) { // tell waiting level3 that we are blocked
96
93
  pthread_cond_signal(&level3_cv);
97
94
  }
@@ -105,7 +102,7 @@ struct LockLevels {
105
102
  void unlock_2() {
106
103
  pthread_mutex_lock(&mutex1);
107
104
  level2_in_use = false;
108
- n_level2 --;
105
+ n_level2--;
109
106
  pthread_cond_signal(&level2_cv);
110
107
  pthread_mutex_unlock(&mutex1);
111
108
  }
@@ -128,17 +125,17 @@ struct LockLevels {
128
125
  pthread_mutex_unlock(&mutex1);
129
126
  }
130
127
 
131
- void print () {
128
+ void print() {
132
129
  pthread_mutex_lock(&mutex1);
133
130
  printf("State: level3_in_use=%d n_level2=%d level1_holders: [",
134
- int(level3_in_use), n_level2);
131
+ int(level3_in_use),
132
+ n_level2);
135
133
  for (int k : level1_holders) {
136
134
  printf("%d ", k);
137
135
  }
138
136
  printf("]\n");
139
137
  pthread_mutex_unlock(&mutex1);
140
138
  }
141
-
142
139
  };
143
140
 
144
141
  /**********************************************
@@ -146,27 +143,27 @@ struct LockLevels {
146
143
  **********************************************/
147
144
 
148
145
  struct OnDiskInvertedLists::OngoingPrefetch {
149
-
150
146
  struct Thread {
151
147
  pthread_t pth;
152
- OngoingPrefetch *pf;
148
+ OngoingPrefetch* pf;
153
149
 
154
- bool one_list () {
150
+ bool one_list() {
155
151
  idx_t list_no = pf->get_next_list();
156
- if(list_no == -1) return false;
157
- const OnDiskInvertedLists *od = pf->od;
158
- od->locks->lock_1 (list_no);
159
- size_t n = od->list_size (list_no);
160
- const Index::idx_t *idx = od->get_ids (list_no);
161
- const uint8_t *codes = od->get_codes (list_no);
152
+ if (list_no == -1)
153
+ return false;
154
+ const OnDiskInvertedLists* od = pf->od;
155
+ od->locks->lock_1(list_no);
156
+ size_t n = od->list_size(list_no);
157
+ const Index::idx_t* idx = od->get_ids(list_no);
158
+ const uint8_t* codes = od->get_codes(list_no);
162
159
  int cs = 0;
163
- for (size_t i = 0; i < n;i++) {
160
+ for (size_t i = 0; i < n; i++) {
164
161
  cs += idx[i];
165
162
  }
166
- const idx_t *codes8 = (const idx_t*)codes;
163
+ const idx_t* codes8 = (const idx_t*)codes;
167
164
  idx_t n8 = n * od->code_size / 8;
168
165
 
169
- for (size_t i = 0; i < n8;i++) {
166
+ for (size_t i = 0; i < n8; i++) {
170
167
  cs += codes8[i];
171
168
  }
172
169
  od->locks->unlock_1(list_no);
@@ -174,7 +171,6 @@ struct OnDiskInvertedLists::OngoingPrefetch {
174
171
  global_cs += cs & 1;
175
172
  return true;
176
173
  }
177
-
178
174
  };
179
175
 
180
176
  std::vector<Thread> threads;
@@ -189,125 +185,123 @@ struct OnDiskInvertedLists::OngoingPrefetch {
189
185
  // pretext to avoid code below to be optimized out
190
186
  static int global_cs;
191
187
 
192
- const OnDiskInvertedLists *od;
188
+ const OnDiskInvertedLists* od;
193
189
 
194
- explicit OngoingPrefetch (const OnDiskInvertedLists *od): od (od)
195
- {
196
- pthread_mutex_init (&mutex, nullptr);
197
- pthread_mutex_init (&list_ids_mutex, nullptr);
190
+ explicit OngoingPrefetch(const OnDiskInvertedLists* od) : od(od) {
191
+ pthread_mutex_init(&mutex, nullptr);
192
+ pthread_mutex_init(&list_ids_mutex, nullptr);
198
193
  cur_list = 0;
199
194
  }
200
195
 
201
- static void* prefetch_list (void * arg) {
202
- Thread *th = static_cast<Thread*>(arg);
196
+ static void* prefetch_list(void* arg) {
197
+ Thread* th = static_cast<Thread*>(arg);
203
198
 
204
- while (th->one_list()) ;
199
+ while (th->one_list())
200
+ ;
205
201
 
206
202
  return nullptr;
207
203
  }
208
204
 
209
- idx_t get_next_list () {
205
+ idx_t get_next_list() {
210
206
  idx_t list_no = -1;
211
- pthread_mutex_lock (&list_ids_mutex);
207
+ pthread_mutex_lock(&list_ids_mutex);
212
208
  if (cur_list >= 0 && cur_list < list_ids.size()) {
213
209
  list_no = list_ids[cur_list++];
214
210
  }
215
- pthread_mutex_unlock (&list_ids_mutex);
211
+ pthread_mutex_unlock(&list_ids_mutex);
216
212
  return list_no;
217
213
  }
218
214
 
219
- void prefetch_lists (const idx_t *list_nos, int n) {
220
- pthread_mutex_lock (&mutex);
221
- pthread_mutex_lock (&list_ids_mutex);
222
- list_ids.clear ();
223
- pthread_mutex_unlock (&list_ids_mutex);
224
- for (auto &th: threads) {
225
- pthread_join (th.pth, nullptr);
215
+ void prefetch_lists(const idx_t* list_nos, int n) {
216
+ pthread_mutex_lock(&mutex);
217
+ pthread_mutex_lock(&list_ids_mutex);
218
+ list_ids.clear();
219
+ pthread_mutex_unlock(&list_ids_mutex);
220
+ for (auto& th : threads) {
221
+ pthread_join(th.pth, nullptr);
226
222
  }
227
223
 
228
- threads.resize (0);
224
+ threads.resize(0);
229
225
  cur_list = 0;
230
- int nt = std::min (n, od->prefetch_nthread);
226
+ int nt = std::min(n, od->prefetch_nthread);
231
227
 
232
228
  if (nt > 0) {
233
229
  // prepare tasks
234
230
  for (int i = 0; i < n; i++) {
235
231
  idx_t list_no = list_nos[i];
236
232
  if (list_no >= 0 && od->list_size(list_no) > 0) {
237
- list_ids.push_back (list_no);
233
+ list_ids.push_back(list_no);
238
234
  }
239
235
  }
240
236
  // prepare threads
241
- threads.resize (nt);
242
- for (Thread &th: threads) {
237
+ threads.resize(nt);
238
+ for (Thread& th : threads) {
243
239
  th.pf = this;
244
- pthread_create (&th.pth, nullptr, prefetch_list, &th);
240
+ pthread_create(&th.pth, nullptr, prefetch_list, &th);
245
241
  }
246
242
  }
247
- pthread_mutex_unlock (&mutex);
243
+ pthread_mutex_unlock(&mutex);
248
244
  }
249
245
 
250
- ~OngoingPrefetch () {
251
- pthread_mutex_lock (&mutex);
252
- for (auto &th: threads) {
253
- pthread_join (th.pth, nullptr);
246
+ ~OngoingPrefetch() {
247
+ pthread_mutex_lock(&mutex);
248
+ for (auto& th : threads) {
249
+ pthread_join(th.pth, nullptr);
254
250
  }
255
- pthread_mutex_unlock (&mutex);
256
- pthread_mutex_destroy (&mutex);
257
- pthread_mutex_destroy (&list_ids_mutex);
251
+ pthread_mutex_unlock(&mutex);
252
+ pthread_mutex_destroy(&mutex);
253
+ pthread_mutex_destroy(&list_ids_mutex);
258
254
  }
259
-
260
255
  };
261
256
 
262
257
  int OnDiskInvertedLists::OngoingPrefetch::global_cs = 0;
263
258
 
264
-
265
- void OnDiskInvertedLists::prefetch_lists (const idx_t *list_nos, int n) const
266
- {
267
- pf->prefetch_lists (list_nos, n);
259
+ void OnDiskInvertedLists::prefetch_lists(const idx_t* list_nos, int n) const {
260
+ pf->prefetch_lists(list_nos, n);
268
261
  }
269
262
 
270
-
271
-
272
263
  /**********************************************
273
264
  * OnDiskInvertedLists: mmapping
274
265
  **********************************************/
275
266
 
276
-
277
- void OnDiskInvertedLists::do_mmap ()
278
- {
279
- const char *rw_flags = read_only ? "r" : "r+";
267
+ void OnDiskInvertedLists::do_mmap() {
268
+ const char* rw_flags = read_only ? "r" : "r+";
280
269
  int prot = read_only ? PROT_READ : PROT_WRITE | PROT_READ;
281
- FILE *f = fopen (filename.c_str(), rw_flags);
282
- FAISS_THROW_IF_NOT_FMT (f, "could not open %s in mode %s: %s",
283
- filename.c_str(), rw_flags, strerror(errno));
284
-
285
- uint8_t * ptro = (uint8_t*)mmap (nullptr, totsize,
286
- prot, MAP_SHARED, fileno (f), 0);
287
-
288
- FAISS_THROW_IF_NOT_FMT (ptro != MAP_FAILED,
289
- "could not mmap %s: %s",
290
- filename.c_str(),
291
- strerror(errno));
270
+ FILE* f = fopen(filename.c_str(), rw_flags);
271
+ FAISS_THROW_IF_NOT_FMT(
272
+ f,
273
+ "could not open %s in mode %s: %s",
274
+ filename.c_str(),
275
+ rw_flags,
276
+ strerror(errno));
277
+
278
+ uint8_t* ptro =
279
+ (uint8_t*)mmap(nullptr, totsize, prot, MAP_SHARED, fileno(f), 0);
280
+
281
+ FAISS_THROW_IF_NOT_FMT(
282
+ ptro != MAP_FAILED,
283
+ "could not mmap %s: %s",
284
+ filename.c_str(),
285
+ strerror(errno));
292
286
  ptr = ptro;
293
- fclose (f);
294
-
287
+ fclose(f);
295
288
  }
296
289
 
297
- void OnDiskInvertedLists::update_totsize (size_t new_size)
298
- {
299
-
290
+ void OnDiskInvertedLists::update_totsize(size_t new_size) {
300
291
  // unmap file
301
292
  if (ptr != nullptr) {
302
- int err = munmap (ptr, totsize);
303
- FAISS_THROW_IF_NOT_FMT (err == 0, "munmap error: %s", strerror(errno));
293
+ int err = munmap(ptr, totsize);
294
+ FAISS_THROW_IF_NOT_FMT(err == 0, "munmap error: %s", strerror(errno));
304
295
  }
305
296
  if (totsize == 0) {
306
297
  // must create file before truncating it
307
- FILE *f = fopen (filename.c_str(), "w");
308
- FAISS_THROW_IF_NOT_FMT (f, "could not open %s in mode W: %s",
309
- filename.c_str(), strerror(errno));
310
- fclose (f);
298
+ FILE* f = fopen(filename.c_str(), "w");
299
+ FAISS_THROW_IF_NOT_FMT(
300
+ f,
301
+ "could not open %s in mode W: %s",
302
+ filename.c_str(),
303
+ strerror(errno));
304
+ fclose(f);
311
305
  }
312
306
 
313
307
  if (new_size > totsize) {
@@ -315,7 +309,7 @@ void OnDiskInvertedLists::update_totsize (size_t new_size)
315
309
  slots.back().offset + slots.back().capacity == totsize) {
316
310
  slots.back().capacity += new_size - totsize;
317
311
  } else {
318
- slots.push_back (Slot(totsize, new_size - totsize));
312
+ slots.push_back(Slot(totsize, new_size - totsize));
319
313
  }
320
314
  } else {
321
315
  assert(!"not implemented");
@@ -324,89 +318,69 @@ void OnDiskInvertedLists::update_totsize (size_t new_size)
324
318
  totsize = new_size;
325
319
 
326
320
  // create file
327
- printf ("resizing %s to %zd bytes\n", filename.c_str(), totsize);
321
+ printf("resizing %s to %zd bytes\n", filename.c_str(), totsize);
328
322
 
329
- int err = truncate (filename.c_str(), totsize);
323
+ int err = truncate(filename.c_str(), totsize);
330
324
 
331
- FAISS_THROW_IF_NOT_FMT (err == 0, "truncate %s to %ld: %s",
332
- filename.c_str(), totsize,
333
- strerror(errno));
334
- do_mmap ();
325
+ FAISS_THROW_IF_NOT_FMT(
326
+ err == 0,
327
+ "truncate %s to %ld: %s",
328
+ filename.c_str(),
329
+ totsize,
330
+ strerror(errno));
331
+ do_mmap();
335
332
  }
336
333
 
337
-
338
-
339
-
340
-
341
-
342
334
  /**********************************************
343
335
  * OnDiskInvertedLists
344
336
  **********************************************/
345
337
 
346
338
  #define INVALID_OFFSET (size_t)(-1)
347
339
 
348
- OnDiskOneList::OnDiskOneList ():
349
- size (0), capacity (0), offset (INVALID_OFFSET)
350
- {}
351
-
352
- OnDiskInvertedLists::Slot::Slot (size_t offset, size_t capacity):
353
- offset (offset), capacity (capacity)
354
- {}
340
+ OnDiskOneList::OnDiskOneList() : size(0), capacity(0), offset(INVALID_OFFSET) {}
355
341
 
356
- OnDiskInvertedLists::Slot::Slot ():
357
- offset (0), capacity (0)
358
- {}
342
+ OnDiskInvertedLists::Slot::Slot(size_t offset, size_t capacity)
343
+ : offset(offset), capacity(capacity) {}
359
344
 
345
+ OnDiskInvertedLists::Slot::Slot() : offset(0), capacity(0) {}
360
346
 
361
-
362
- OnDiskInvertedLists::OnDiskInvertedLists (
363
- size_t nlist, size_t code_size,
364
- const char *filename):
365
- InvertedLists (nlist, code_size),
366
- filename (filename),
367
- totsize (0),
368
- ptr (nullptr),
369
- read_only (false),
370
- locks (new LockLevels ()),
371
- pf (new OngoingPrefetch (this)),
372
- prefetch_nthread (32)
373
- {
374
- lists.resize (nlist);
347
+ OnDiskInvertedLists::OnDiskInvertedLists(
348
+ size_t nlist,
349
+ size_t code_size,
350
+ const char* filename)
351
+ : InvertedLists(nlist, code_size),
352
+ filename(filename),
353
+ totsize(0),
354
+ ptr(nullptr),
355
+ read_only(false),
356
+ locks(new LockLevels()),
357
+ pf(new OngoingPrefetch(this)),
358
+ prefetch_nthread(32) {
359
+ lists.resize(nlist);
375
360
 
376
361
  // slots starts empty
377
362
  }
378
363
 
379
- OnDiskInvertedLists::OnDiskInvertedLists ():
380
- OnDiskInvertedLists (0, 0, "")
381
- {
382
- }
364
+ OnDiskInvertedLists::OnDiskInvertedLists() : OnDiskInvertedLists(0, 0, "") {}
383
365
 
384
- OnDiskInvertedLists::~OnDiskInvertedLists ()
385
- {
366
+ OnDiskInvertedLists::~OnDiskInvertedLists() {
386
367
  delete pf;
387
368
 
388
369
  // unmap all lists
389
370
  if (ptr != nullptr) {
390
- int err = munmap (ptr, totsize);
371
+ int err = munmap(ptr, totsize);
391
372
  if (err != 0) {
392
- fprintf(stderr, "mumap error: %s",
393
- strerror(errno));
373
+ fprintf(stderr, "mumap error: %s", strerror(errno));
394
374
  }
395
375
  }
396
376
  delete locks;
397
377
  }
398
378
 
399
-
400
-
401
-
402
- size_t OnDiskInvertedLists::list_size(size_t list_no) const
403
- {
379
+ size_t OnDiskInvertedLists::list_size(size_t list_no) const {
404
380
  return lists[list_no].size;
405
381
  }
406
382
 
407
-
408
- const uint8_t * OnDiskInvertedLists::get_codes (size_t list_no) const
409
- {
383
+ const uint8_t* OnDiskInvertedLists::get_codes(size_t list_no) const {
410
384
  if (lists[list_no].offset == INVALID_OFFSET) {
411
385
  return nullptr;
412
386
  }
@@ -414,68 +388,65 @@ const uint8_t * OnDiskInvertedLists::get_codes (size_t list_no) const
414
388
  return ptr + lists[list_no].offset;
415
389
  }
416
390
 
417
- const Index::idx_t * OnDiskInvertedLists::get_ids (size_t list_no) const
418
- {
391
+ const Index::idx_t* OnDiskInvertedLists::get_ids(size_t list_no) const {
419
392
  if (lists[list_no].offset == INVALID_OFFSET) {
420
393
  return nullptr;
421
394
  }
422
395
 
423
- return (const idx_t*)(ptr + lists[list_no].offset +
424
- code_size * lists[list_no].capacity);
396
+ return (
397
+ const idx_t*)(ptr + lists[list_no].offset + code_size * lists[list_no].capacity);
425
398
  }
426
399
 
427
-
428
- void OnDiskInvertedLists::update_entries (
429
- size_t list_no, size_t offset, size_t n_entry,
430
- const idx_t *ids_in, const uint8_t *codes_in)
431
- {
432
- FAISS_THROW_IF_NOT (!read_only);
433
- if (n_entry == 0) return;
434
- const List & l = lists[list_no];
435
- assert (n_entry + offset <= l.size);
436
- idx_t *ids = const_cast<idx_t*>(get_ids (list_no));
437
- memcpy (ids + offset, ids_in, sizeof(ids_in[0]) * n_entry);
438
- uint8_t *codes = const_cast<uint8_t*>(get_codes (list_no));
439
- memcpy (codes + offset * code_size, codes_in, code_size * n_entry);
400
+ void OnDiskInvertedLists::update_entries(
401
+ size_t list_no,
402
+ size_t offset,
403
+ size_t n_entry,
404
+ const idx_t* ids_in,
405
+ const uint8_t* codes_in) {
406
+ FAISS_THROW_IF_NOT(!read_only);
407
+ if (n_entry == 0)
408
+ return;
409
+ const List& l = lists[list_no];
410
+ assert(n_entry + offset <= l.size);
411
+ idx_t* ids = const_cast<idx_t*>(get_ids(list_no));
412
+ memcpy(ids + offset, ids_in, sizeof(ids_in[0]) * n_entry);
413
+ uint8_t* codes = const_cast<uint8_t*>(get_codes(list_no));
414
+ memcpy(codes + offset * code_size, codes_in, code_size * n_entry);
440
415
  }
441
416
 
442
- size_t OnDiskInvertedLists::add_entries (
443
- size_t list_no, size_t n_entry,
444
- const idx_t* ids, const uint8_t *code)
445
- {
446
- FAISS_THROW_IF_NOT (!read_only);
447
- locks->lock_1 (list_no);
448
- size_t o = list_size (list_no);
449
- resize_locked (list_no, n_entry + o);
450
- update_entries (list_no, o, n_entry, ids, code);
451
- locks->unlock_1 (list_no);
417
+ size_t OnDiskInvertedLists::add_entries(
418
+ size_t list_no,
419
+ size_t n_entry,
420
+ const idx_t* ids,
421
+ const uint8_t* code) {
422
+ FAISS_THROW_IF_NOT(!read_only);
423
+ locks->lock_1(list_no);
424
+ size_t o = list_size(list_no);
425
+ resize_locked(list_no, n_entry + o);
426
+ update_entries(list_no, o, n_entry, ids, code);
427
+ locks->unlock_1(list_no);
452
428
  return o;
453
429
  }
454
430
 
455
- void OnDiskInvertedLists::resize (size_t list_no, size_t new_size)
456
- {
457
- FAISS_THROW_IF_NOT (!read_only);
458
- locks->lock_1 (list_no);
459
- resize_locked (list_no, new_size);
460
- locks->unlock_1 (list_no);
431
+ void OnDiskInvertedLists::resize(size_t list_no, size_t new_size) {
432
+ FAISS_THROW_IF_NOT(!read_only);
433
+ locks->lock_1(list_no);
434
+ resize_locked(list_no, new_size);
435
+ locks->unlock_1(list_no);
461
436
  }
462
437
 
438
+ void OnDiskInvertedLists::resize_locked(size_t list_no, size_t new_size) {
439
+ List& l = lists[list_no];
463
440
 
464
-
465
- void OnDiskInvertedLists::resize_locked (size_t list_no, size_t new_size)
466
- {
467
- List & l = lists[list_no];
468
-
469
- if (new_size <= l.capacity &&
470
- new_size > l.capacity / 2) {
441
+ if (new_size <= l.capacity && new_size > l.capacity / 2) {
471
442
  l.size = new_size;
472
443
  return;
473
444
  }
474
445
 
475
446
  // otherwise we release the current slot, and find a new one
476
447
 
477
- locks->lock_2 ();
478
- free_slot (l.offset, l.capacity);
448
+ locks->lock_2();
449
+ free_slot(l.offset, l.capacity);
479
450
 
480
451
  List new_l;
481
452
 
@@ -487,25 +458,26 @@ void OnDiskInvertedLists::resize_locked (size_t list_no, size_t new_size)
487
458
  while (new_l.capacity < new_size) {
488
459
  new_l.capacity *= 2;
489
460
  }
490
- new_l.offset = allocate_slot (
491
- new_l.capacity * (sizeof(idx_t) + code_size));
461
+ new_l.offset =
462
+ allocate_slot(new_l.capacity * (sizeof(idx_t) + code_size));
492
463
  }
493
464
 
494
465
  // copy common data
495
466
  if (l.offset != new_l.offset) {
496
- size_t n = std::min (new_size, l.size);
467
+ size_t n = std::min(new_size, l.size);
497
468
  if (n > 0) {
498
- memcpy (ptr + new_l.offset, get_codes(list_no), n * code_size);
499
- memcpy (ptr + new_l.offset + new_l.capacity * code_size,
500
- get_ids (list_no), n * sizeof(idx_t));
469
+ memcpy(ptr + new_l.offset, get_codes(list_no), n * code_size);
470
+ memcpy(ptr + new_l.offset + new_l.capacity * code_size,
471
+ get_ids(list_no),
472
+ n * sizeof(idx_t));
501
473
  }
502
474
  }
503
475
 
504
476
  lists[list_no] = new_l;
505
- locks->unlock_2 ();
477
+ locks->unlock_2();
506
478
  }
507
479
 
508
- size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
480
+ size_t OnDiskInvertedLists::allocate_slot(size_t capacity) {
509
481
  // should hold lock2
510
482
 
511
483
  auto it = slots.begin();
@@ -519,19 +491,19 @@ size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
519
491
  while (new_size - totsize < capacity) {
520
492
  new_size *= 2;
521
493
  }
522
- locks->lock_3 ();
494
+ locks->lock_3();
523
495
  update_totsize(new_size);
524
- locks->unlock_3 ();
496
+ locks->unlock_3();
525
497
  it = slots.begin();
526
498
  while (it != slots.end() && it->capacity < capacity) {
527
499
  it++;
528
500
  }
529
- assert (it != slots.end());
501
+ assert(it != slots.end());
530
502
  }
531
503
 
532
504
  size_t o = it->offset;
533
505
  if (it->capacity == capacity) {
534
- slots.erase (it);
506
+ slots.erase(it);
535
507
  } else {
536
508
  // take from beginning of slot
537
509
  it->capacity -= capacity;
@@ -541,12 +513,10 @@ size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
541
513
  return o;
542
514
  }
543
515
 
544
-
545
-
546
- void OnDiskInvertedLists::free_slot (size_t offset, size_t capacity) {
547
-
516
+ void OnDiskInvertedLists::free_slot(size_t offset, size_t capacity) {
548
517
  // should hold lock2
549
- if (capacity == 0) return;
518
+ if (capacity == 0)
519
+ return;
550
520
 
551
521
  auto it = slots.begin();
552
522
  while (it != slots.end() && it->offset <= offset) {
@@ -567,15 +537,15 @@ void OnDiskInvertedLists::free_slot (size_t offset, size_t capacity) {
567
537
  begin_next = it->offset;
568
538
  }
569
539
 
570
- assert (end_prev == inf || offset >= end_prev);
571
- assert (offset + capacity <= begin_next);
540
+ assert(end_prev == inf || offset >= end_prev);
541
+ assert(offset + capacity <= begin_next);
572
542
 
573
543
  if (offset == end_prev) {
574
544
  auto prev = it;
575
545
  prev--;
576
546
  if (offset + capacity == begin_next) {
577
547
  prev->capacity += capacity + it->capacity;
578
- slots.erase (it);
548
+ slots.erase(it);
579
549
  } else {
580
550
  prev->capacity += capacity;
581
551
  }
@@ -584,36 +554,37 @@ void OnDiskInvertedLists::free_slot (size_t offset, size_t capacity) {
584
554
  it->offset -= capacity;
585
555
  it->capacity += capacity;
586
556
  } else {
587
- slots.insert (it, Slot (offset, capacity));
557
+ slots.insert(it, Slot(offset, capacity));
588
558
  }
589
559
  }
590
560
 
591
561
  // TODO shrink global storage if needed
592
562
  }
593
563
 
594
-
595
564
  /*****************************************
596
565
  * Compact form
597
566
  *****************************************/
598
567
 
599
- size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il,
600
- bool verbose)
601
- {
602
- FAISS_THROW_IF_NOT_MSG (totsize == 0, "works only on an empty InvertedLists");
568
+ size_t OnDiskInvertedLists::merge_from(
569
+ const InvertedLists** ils,
570
+ int n_il,
571
+ bool verbose) {
572
+ FAISS_THROW_IF_NOT_MSG(
573
+ totsize == 0, "works only on an empty InvertedLists");
603
574
 
604
- std::vector<size_t> sizes (nlist);
575
+ std::vector<size_t> sizes(nlist);
605
576
  for (int i = 0; i < n_il; i++) {
606
- const InvertedLists *il = ils[i];
607
- FAISS_THROW_IF_NOT (il->nlist == nlist && il->code_size == code_size);
577
+ const InvertedLists* il = ils[i];
578
+ FAISS_THROW_IF_NOT(il->nlist == nlist && il->code_size == code_size);
608
579
 
609
- for (size_t j = 0; j < nlist; j++) {
610
- sizes [j] += il->list_size(j);
580
+ for (size_t j = 0; j < nlist; j++) {
581
+ sizes[j] += il->list_size(j);
611
582
  }
612
583
  }
613
584
 
614
585
  size_t cums = 0;
615
586
  size_t ntotal = 0;
616
- for (size_t j = 0; j < nlist; j++) {
587
+ for (size_t j = 0; j < nlist; j++) {
617
588
  ntotal += sizes[j];
618
589
  lists[j].size = 0;
619
590
  lists[j].capacity = sizes[j];
@@ -621,24 +592,26 @@ size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il,
621
592
  cums += lists[j].capacity * (sizeof(idx_t) + code_size);
622
593
  }
623
594
 
624
- update_totsize (cums);
625
-
595
+ update_totsize(cums);
626
596
 
627
597
  size_t nmerged = 0;
628
598
  double t0 = getmillisecs(), last_t = t0;
629
599
 
630
600
  #pragma omp parallel for
631
601
  for (size_t j = 0; j < nlist; j++) {
632
- List & l = lists[j];
602
+ List& l = lists[j];
633
603
  for (int i = 0; i < n_il; i++) {
634
- const InvertedLists *il = ils[i];
604
+ const InvertedLists* il = ils[i];
635
605
  size_t n_entry = il->list_size(j);
636
606
  l.size += n_entry;
637
- update_entries (j, l.size - n_entry, n_entry,
638
- ScopedIds(il, j).get(),
639
- ScopedCodes(il, j).get());
607
+ update_entries(
608
+ j,
609
+ l.size - n_entry,
610
+ n_entry,
611
+ ScopedIds(il, j).get(),
612
+ ScopedCodes(il, j).get());
640
613
  }
641
- assert (l.size == l.capacity);
614
+ assert(l.size == l.capacity);
642
615
  if (verbose) {
643
616
  #pragma omp critical
644
617
  {
@@ -646,72 +619,64 @@ size_t OnDiskInvertedLists::merge_from (const InvertedLists **ils, int n_il,
646
619
  double t1 = getmillisecs();
647
620
  if (t1 - last_t > 500) {
648
621
  printf("merged %zd lists in %.3f s\r",
649
- nmerged, (t1 - t0) / 1000.0);
622
+ nmerged,
623
+ (t1 - t0) / 1000.0);
650
624
  fflush(stdout);
651
625
  last_t = t1;
652
626
  }
653
627
  }
654
628
  }
655
629
  }
656
- if(verbose) {
630
+ if (verbose) {
657
631
  printf("\n");
658
632
  }
659
633
 
660
634
  return ntotal;
661
635
  }
662
636
 
663
-
664
- size_t OnDiskInvertedLists::merge_from_1 (const InvertedLists *ils, bool verbose)
665
- {
666
- return merge_from (&ils, 1, verbose);
637
+ size_t OnDiskInvertedLists::merge_from_1(
638
+ const InvertedLists* ils,
639
+ bool verbose) {
640
+ return merge_from(&ils, 1, verbose);
667
641
  }
668
642
 
669
-
670
- void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1)
671
- {
643
+ void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1) {
672
644
  FAISS_THROW_IF_NOT(0 <= l0 && l0 <= l1 && l1 <= nlist);
673
645
 
674
- std::vector<List> new_lists (l1 - l0);
675
- memcpy (new_lists.data(), &lists[l0], (l1 - l0) * sizeof(List));
646
+ std::vector<List> new_lists(l1 - l0);
647
+ memcpy(new_lists.data(), &lists[l0], (l1 - l0) * sizeof(List));
676
648
 
677
649
  lists.swap(new_lists);
678
650
 
679
651
  nlist = l1 - l0;
680
652
  }
681
653
 
682
-
683
- void OnDiskInvertedLists::set_all_lists_sizes(const size_t *sizes)
684
- {
654
+ void OnDiskInvertedLists::set_all_lists_sizes(const size_t* sizes) {
685
655
  size_t ofs = 0;
686
656
  for (size_t i = 0; i < nlist; i++) {
687
657
  lists[i].offset = ofs;
688
658
  lists[i].capacity = lists[i].size = sizes[i];
689
659
  ofs += sizes[i] * (sizeof(idx_t) + code_size);
690
660
  }
691
-
692
661
  }
693
662
 
694
663
  /*******************************************************
695
664
  * I/O support via callbacks
696
665
  *******************************************************/
697
666
 
698
-
699
-
700
-
701
- OnDiskInvertedListsIOHook::OnDiskInvertedListsIOHook():
702
- InvertedListsIOHook("ilod", typeid(OnDiskInvertedLists).name())
703
- {}
704
-
705
-
706
- void OnDiskInvertedListsIOHook::write(const InvertedLists *ils, IOWriter *f) const
707
- {
708
- uint32_t h = fourcc ("ilod");
709
- WRITE1 (h);
710
- WRITE1 (ils->nlist);
711
- WRITE1 (ils->code_size);
712
- const OnDiskInvertedLists *od = dynamic_cast<const OnDiskInvertedLists*> (ils);
667
+ OnDiskInvertedListsIOHook::OnDiskInvertedListsIOHook()
668
+ : InvertedListsIOHook("ilod", typeid(OnDiskInvertedLists).name()) {}
669
+
670
+ void OnDiskInvertedListsIOHook::write(const InvertedLists* ils, IOWriter* f)
671
+ const {
672
+ uint32_t h = fourcc("ilod");
673
+ WRITE1(h);
674
+ WRITE1(ils->nlist);
675
+ WRITE1(ils->code_size);
676
+ const OnDiskInvertedLists* od =
677
+ dynamic_cast<const OnDiskInvertedLists*>(ils);
713
678
  // this is a POD object
714
- WRITEVECTOR (od->lists);
679
+ WRITEVECTOR(od->lists);
715
680
 
716
681
  {
717
682
  std::vector<OnDiskInvertedLists::Slot> v(
@@ -725,14 +690,14 @@ void OnDiskInvertedListsIOHook::write(const InvertedLists *ils, IOWriter *f) con
725
690
  WRITE1(od->totsize);
726
691
  }
727
692
 
728
- InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
729
- {
730
- OnDiskInvertedLists *od = new OnDiskInvertedLists();
693
+ InvertedLists* OnDiskInvertedListsIOHook::read(IOReader* f, int io_flags)
694
+ const {
695
+ OnDiskInvertedLists* od = new OnDiskInvertedLists();
731
696
  od->read_only = io_flags & IO_FLAG_READ_ONLY;
732
- READ1 (od->nlist);
733
- READ1 (od->code_size);
697
+ READ1(od->nlist);
698
+ READ1(od->code_size);
734
699
  // this is a POD object
735
- READVECTOR (od->lists);
700
+ READVECTOR(od->lists);
736
701
  {
737
702
  std::vector<OnDiskInvertedLists::Slot> v;
738
703
  READVECTOR(v);
@@ -744,9 +709,10 @@ InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
744
709
  od->filename.assign(x.begin(), x.end());
745
710
 
746
711
  if (io_flags & IO_FLAG_ONDISK_SAME_DIR) {
747
- FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
748
- FAISS_THROW_IF_NOT_MSG (
749
- reader, "IO_FLAG_ONDISK_SAME_DIR only supported "
712
+ FileIOReader* reader = dynamic_cast<FileIOReader*>(f);
713
+ FAISS_THROW_IF_NOT_MSG(
714
+ reader,
715
+ "IO_FLAG_ONDISK_SAME_DIR only supported "
750
716
  "when reading from file");
751
717
  std::string indexname = reader->name;
752
718
  std::string dirname = "./";
@@ -762,10 +728,10 @@ InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
762
728
  filename = dirname + filename;
763
729
  printf("IO_FLAG_ONDISK_SAME_DIR: "
764
730
  "updating ondisk filename from %s to %s\n",
765
- od->filename.c_str(), filename.c_str());
731
+ od->filename.c_str(),
732
+ filename.c_str());
766
733
  od->filename = filename;
767
734
  }
768
-
769
735
  }
770
736
  READ1(od->totsize);
771
737
  if (!(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
@@ -775,53 +741,51 @@ InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
775
741
  }
776
742
 
777
743
  /** read from a ArrayInvertedLists into this invertedlist type */
778
- InvertedLists * OnDiskInvertedListsIOHook::read_ArrayInvertedLists(
779
- IOReader *f, int /* io_flags */,
780
- size_t nlist, size_t code_size,
781
- const std::vector<size_t> &sizes) const
782
- {
783
- auto ails = new OnDiskInvertedLists ();
744
+ InvertedLists* OnDiskInvertedListsIOHook::read_ArrayInvertedLists(
745
+ IOReader* f,
746
+ int /* io_flags */,
747
+ size_t nlist,
748
+ size_t code_size,
749
+ const std::vector<size_t>& sizes) const {
750
+ auto ails = new OnDiskInvertedLists();
784
751
  ails->nlist = nlist;
785
752
  ails->code_size = code_size;
786
753
  ails->read_only = true;
787
- ails->lists.resize (nlist);
754
+ ails->lists.resize(nlist);
788
755
 
789
- FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
756
+ FileIOReader* reader = dynamic_cast<FileIOReader*>(f);
790
757
  FAISS_THROW_IF_NOT_MSG(reader, "mmap only supported for File objects");
791
- FILE *fdesc = reader->f;
758
+ FILE* fdesc = reader->f;
792
759
  size_t o0 = ftell(fdesc);
793
760
  size_t o = o0;
794
761
  { // do the mmap
795
762
  struct stat buf;
796
- int ret = fstat (fileno(fdesc), &buf);
797
- FAISS_THROW_IF_NOT_FMT (ret == 0,
798
- "fstat failed: %s", strerror(errno));
763
+ int ret = fstat(fileno(fdesc), &buf);
764
+ FAISS_THROW_IF_NOT_FMT(ret == 0, "fstat failed: %s", strerror(errno));
799
765
  ails->totsize = buf.st_size;
800
- ails->ptr = (uint8_t*)mmap (nullptr, ails->totsize,
801
- PROT_READ, MAP_SHARED,
802
- fileno(fdesc), 0);
803
- FAISS_THROW_IF_NOT_FMT (ails->ptr != MAP_FAILED,
804
- "could not mmap: %s",
805
- strerror(errno));
766
+ ails->ptr = (uint8_t*)mmap(
767
+ nullptr,
768
+ ails->totsize,
769
+ PROT_READ,
770
+ MAP_SHARED,
771
+ fileno(fdesc),
772
+ 0);
773
+ FAISS_THROW_IF_NOT_FMT(
774
+ ails->ptr != MAP_FAILED, "could not mmap: %s", strerror(errno));
806
775
  }
807
776
 
808
777
  FAISS_THROW_IF_NOT(o <= ails->totsize);
809
778
 
810
779
  for (size_t i = 0; i < ails->nlist; i++) {
811
- OnDiskInvertedLists::List & l = ails->lists[i];
780
+ OnDiskInvertedLists::List& l = ails->lists[i];
812
781
  l.size = l.capacity = sizes[i];
813
782
  l.offset = o;
814
- o += l.size * (sizeof(OnDiskInvertedLists::idx_t) +
815
- ails->code_size);
783
+ o += l.size * (sizeof(OnDiskInvertedLists::idx_t) + ails->code_size);
816
784
  }
817
785
  // resume normal reading of file
818
- fseek (fdesc, o, SEEK_SET);
786
+ fseek(fdesc, o, SEEK_SET);
819
787
 
820
788
  return ails;
821
789
  }
822
790
 
823
-
824
-
825
-
826
-
827
791
  } // namespace faiss