faiss 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +1 -1
  6. data/lib/faiss/version.rb +1 -1
  7. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  8. data/vendor/faiss/faiss/AutoTune.h +6 -3
  9. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  10. data/vendor/faiss/faiss/Index.cpp +3 -4
  11. data/vendor/faiss/faiss/Index.h +3 -3
  12. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  13. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  14. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  15. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  16. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  17. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  18. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  19. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  20. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  21. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  22. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  23. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  24. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  25. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  26. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  27. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  28. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  29. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  30. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  31. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  32. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  33. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  34. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  35. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  36. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  37. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  38. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  39. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  40. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  41. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  42. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  43. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  44. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  45. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  46. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  47. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  48. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  49. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  50. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  51. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  52. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  53. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  54. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  55. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  56. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  57. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  58. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  59. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  60. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  61. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  62. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  63. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  64. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  65. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  66. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  67. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  68. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  69. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  70. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  71. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  72. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  73. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  74. data/vendor/faiss/faiss/impl/io.h +7 -2
  75. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  76. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  77. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  78. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  79. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  80. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  81. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  82. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  83. data/vendor/faiss/faiss/index_io.h +1 -48
  84. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  85. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  86. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  87. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  88. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  89. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  90. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  91. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  92. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  93. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  94. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  95. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  96. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  97. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  98. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  99. data/vendor/faiss/faiss/utils/distances.h +28 -20
  100. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  101. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  102. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  103. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  104. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  105. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  106. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  107. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  108. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  109. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  110. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  111. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  112. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  113. metadata +43 -141
  114. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  115. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  116. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  117. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  118. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  119. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  120. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  121. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  122. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  123. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  124. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  125. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  126. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  127. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  128. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  129. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  130. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  131. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  132. data/vendor/faiss/c_api/Index_c.h +0 -183
  133. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  134. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  135. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  136. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  137. data/vendor/faiss/c_api/error_c.h +0 -42
  138. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  139. data/vendor/faiss/c_api/error_impl.h +0 -16
  140. data/vendor/faiss/c_api/faiss_c.h +0 -58
  141. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  142. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  143. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  144. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  145. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  146. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  147. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  148. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  149. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  150. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  151. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  152. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  153. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  154. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  155. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  156. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  157. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  158. data/vendor/faiss/c_api/index_io_c.h +0 -50
  159. data/vendor/faiss/c_api/macros_impl.h +0 -110
  160. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  161. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  162. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  163. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  164. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  165. data/vendor/faiss/misc/test_blas.cpp +0 -87
  166. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  167. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  168. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  169. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  170. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  171. data/vendor/faiss/tests/test_merge.cpp +0 -260
  172. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  173. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  174. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  175. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  176. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  177. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  178. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  179. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  180. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  181. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  182. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  183. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  184. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -39,8 +39,12 @@ namespace faiss {
39
39
  * that hides the template mess.
40
40
  ********************************************************************/
41
41
 
42
- #if defined(__F16C__) && defined(__AVX2__)
42
+ #ifdef __AVX2__
43
+ #ifdef __F16C__
43
44
  #define USE_F16C
45
+ #else
46
+ #warning "Cannot enable AVX optimizations in scalar quantizer if -mf16c is not set as well"
47
+ #endif
44
48
  #endif
45
49
 
46
50
 
@@ -1220,33 +1224,41 @@ SQDistanceComputer *select_distance_computer (
1220
1224
 
1221
1225
  ScalarQuantizer::ScalarQuantizer
1222
1226
  (size_t d, QuantizerType qtype):
1223
- qtype (qtype), rangestat(RS_minmax), rangestat_arg(0), d (d)
1227
+ qtype (qtype), rangestat(RS_minmax), rangestat_arg(0), d(d)
1228
+ {
1229
+ set_derived_sizes();
1230
+ }
1231
+
1232
+ ScalarQuantizer::ScalarQuantizer ():
1233
+ qtype(QT_8bit),
1234
+ rangestat(RS_minmax), rangestat_arg(0), d(0), bits(0), code_size(0)
1235
+ {}
1236
+
1237
+ void ScalarQuantizer::set_derived_sizes ()
1224
1238
  {
1225
1239
  switch (qtype) {
1226
- case QT_8bit:
1227
- case QT_8bit_uniform:
1228
- case QT_8bit_direct:
1240
+ case QT_8bit:
1241
+ case QT_8bit_uniform:
1242
+ case QT_8bit_direct:
1229
1243
  code_size = d;
1244
+ bits = 8;
1230
1245
  break;
1231
- case QT_4bit:
1232
- case QT_4bit_uniform:
1246
+ case QT_4bit:
1247
+ case QT_4bit_uniform:
1233
1248
  code_size = (d + 1) / 2;
1249
+ bits = 4;
1234
1250
  break;
1235
- case QT_6bit:
1251
+ case QT_6bit:
1236
1252
  code_size = (d * 6 + 7) / 8;
1253
+ bits = 6;
1237
1254
  break;
1238
- case QT_fp16:
1255
+ case QT_fp16:
1239
1256
  code_size = d * 2;
1257
+ bits = 16;
1240
1258
  break;
1241
1259
  }
1242
-
1243
1260
  }
1244
1261
 
1245
- ScalarQuantizer::ScalarQuantizer ():
1246
- qtype(QT_8bit),
1247
- rangestat(RS_minmax), rangestat_arg(0), d (0), code_size(0)
1248
- {}
1249
-
1250
1262
  void ScalarQuantizer::train (size_t n, const float *x)
1251
1263
  {
1252
1264
  int bit_per_dim =
@@ -1418,9 +1430,8 @@ struct IVFSQScannerIP: InvertedListScanner {
1418
1430
  float accu = accu0 + dc.query_to_code (codes);
1419
1431
 
1420
1432
  if (accu > simi [0]) {
1421
- minheap_pop (k, simi, idxi);
1422
1433
  int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
1423
- minheap_push (k, simi, idxi, accu, id);
1434
+ minheap_replace_top (k, simi, idxi, accu, id);
1424
1435
  nup++;
1425
1436
  }
1426
1437
  codes += code_size;
@@ -1506,9 +1517,8 @@ struct IVFSQScannerL2: InvertedListScanner {
1506
1517
  float dis = dc.query_to_code (codes);
1507
1518
 
1508
1519
  if (dis < simi [0]) {
1509
- maxheap_pop (k, simi, idxi);
1510
1520
  int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
1511
- maxheap_push (k, simi, idxi, dis, id);
1521
+ maxheap_replace_top (k, simi, idxi, dis, id);
1512
1522
  nup++;
1513
1523
  }
1514
1524
  codes += code_size;
@@ -53,6 +53,9 @@ struct ScalarQuantizer {
53
53
  /// dimension of input vectors
54
54
  size_t d;
55
55
 
56
+ /// bits per scalar code
57
+ size_t bits;
58
+
56
59
  /// bytes per vector
57
60
  size_t code_size;
58
61
 
@@ -62,6 +65,9 @@ struct ScalarQuantizer {
62
65
  ScalarQuantizer (size_t d, QuantizerType qtype);
63
66
  ScalarQuantizer ();
64
67
 
68
+ /// updates internal values based on qtype and d
69
+ void set_derived_sizes ();
70
+
65
71
  void train (size_t n, const float *x);
66
72
 
67
73
  /// Used by an IVF index to train based on the residuals
@@ -15,15 +15,13 @@
15
15
  #include <sys/types.h>
16
16
  #include <sys/stat.h>
17
17
 
18
- #ifndef _MSC_VER
19
- #include <sys/mman.h>
20
- #endif // !_MSC_VER
21
-
22
18
  #include <faiss/impl/FaissAssert.h>
23
19
  #include <faiss/impl/io.h>
24
20
  #include <faiss/impl/io_macros.h>
25
21
  #include <faiss/utils/hamming.h>
26
22
 
23
+ #include <faiss/invlists/InvertedListsIOHook.h>
24
+
27
25
  #include <faiss/IndexFlat.h>
28
26
  #include <faiss/VectorTransform.h>
29
27
  #include <faiss/IndexPreTransform.h>
@@ -39,17 +37,16 @@
39
37
  #include <faiss/IndexScalarQuantizer.h>
40
38
  #include <faiss/IndexHNSW.h>
41
39
  #include <faiss/IndexLattice.h>
40
+ #include <faiss/IndexPQFastScan.h>
41
+ #include <faiss/IndexIVFPQFastScan.h>
42
+ #include <faiss/IndexRefine.h>
43
+
42
44
  #include <faiss/IndexBinaryFlat.h>
43
45
  #include <faiss/IndexBinaryFromFloat.h>
44
46
  #include <faiss/IndexBinaryHNSW.h>
45
47
  #include <faiss/IndexBinaryIVF.h>
46
48
  #include <faiss/IndexBinaryHash.h>
47
49
 
48
- #ifndef _MSC_VER
49
- #include <faiss/OnDiskInvertedLists.h>
50
- #endif // !_MSC_VER
51
-
52
-
53
50
  namespace faiss {
54
51
 
55
52
 
@@ -141,7 +138,10 @@ VectorTransform* read_VectorTransform (IOReader *f) {
141
138
  }
142
139
  vt = itqt;
143
140
  } else {
144
- FAISS_THROW_MSG("fourcc not recognized");
141
+ FAISS_THROW_FMT(
142
+ "fourcc %ud (\"%s\") not recognized",
143
+ h, fourcc_inv_printable(h).c_str()
144
+ );
145
145
  }
146
146
  READ1 (vt->d_in);
147
147
  READ1 (vt->d_out);
@@ -167,7 +167,10 @@ static void read_ArrayInvertedLists_sizes (
167
167
  sizes[idsizes[j]] = idsizes[j + 1];
168
168
  }
169
169
  } else {
170
- FAISS_THROW_MSG ("invalid list_type");
170
+ FAISS_THROW_FMT(
171
+ "list_type %ud (\"%s\") not recognized",
172
+ list_type, fourcc_inv_printable(list_type).c_str()
173
+ );
171
174
  }
172
175
  }
173
176
 
@@ -199,11 +202,6 @@ InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
199
202
  }
200
203
  return ails;
201
204
 
202
- #ifdef _MSC_VER
203
- } else {
204
- FAISS_THROW_MSG("Unsupported inverted list format for Windows");
205
- }
206
- #else
207
205
  } else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
208
206
  // code is always ilxx where xx is specific to the type of invlists we want
209
207
  // so we get the 16 high bits from the io_flag and the 16 low bits as "il"
@@ -218,7 +216,6 @@ InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
218
216
  } else {
219
217
  return InvertedListsIOHook::lookup(h)->read(f, io_flags);
220
218
  }
221
- #endif // !_MSC_VER
222
219
 
223
220
  }
224
221
 
@@ -226,8 +223,11 @@ InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
226
223
  static void read_InvertedLists (
227
224
  IndexIVF *ivf, IOReader *f, int io_flags) {
228
225
  InvertedLists *ils = read_InvertedLists (f, io_flags);
229
- FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist &&
230
- ils->code_size == ivf->code_size));
226
+ if (ils) {
227
+ FAISS_THROW_IF_NOT (ils->nlist == ivf->nlist);
228
+ FAISS_THROW_IF_NOT (ils->code_size == InvertedLists::INVALID_CODE_SIZE ||
229
+ ils->code_size == ivf->code_size);
230
+ }
231
231
  ivf->invlists = ils;
232
232
  ivf->own_invlists = true;
233
233
  }
@@ -247,6 +247,7 @@ static void read_ScalarQuantizer (ScalarQuantizer *ivsc, IOReader *f) {
247
247
  READ1 (ivsc->d);
248
248
  READ1 (ivsc->code_size);
249
249
  READVECTOR (ivsc->trained);
250
+ ivsc->set_derived_sizes ();
250
251
  }
251
252
 
252
253
 
@@ -551,14 +552,20 @@ Index *read_index (IOReader *f, int io_flags) {
551
552
  read_ProductQuantizer (&imiq->pq, f);
552
553
  idx = imiq;
553
554
  } else if(h == fourcc ("IxRF")) {
554
- IndexRefineFlat *idxrf = new IndexRefineFlat ();
555
+ IndexRefine *idxrf = new IndexRefine ();
555
556
  read_index_header (idxrf, f);
556
557
  idxrf->base_index = read_index(f, io_flags);
557
- idxrf->own_fields = true;
558
- IndexFlat *rf = dynamic_cast<IndexFlat*> (read_index (f, io_flags));
559
- std::swap (*rf, idxrf->refine_index);
560
- delete rf;
558
+ idxrf->refine_index = read_index(f, io_flags);
561
559
  READ1 (idxrf->k_factor);
560
+ if (dynamic_cast<IndexFlat*>(idxrf->refine_index)) {
561
+ // then make a RefineFlat with it
562
+ IndexRefine *idxrf_old = idxrf;
563
+ idxrf = new IndexRefineFlat();
564
+ *idxrf = *idxrf_old;
565
+ delete idxrf_old;
566
+ }
567
+ idxrf->own_fields = true;
568
+ idxrf->own_refine_index = true;
562
569
  idx = idxrf;
563
570
  } else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) {
564
571
  bool is_map2 = h == fourcc ("IxM2");
@@ -598,8 +605,36 @@ Index *read_index (IOReader *f, int io_flags) {
598
605
  dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table ();
599
606
  }
600
607
  idx = idxhnsw;
608
+ } else if(h == fourcc("IPfs")) {
609
+ IndexPQFastScan *idxpqfs = new IndexPQFastScan();
610
+ read_index_header (idxpqfs, f);
611
+ read_ProductQuantizer (&idxpqfs->pq, f);
612
+ READ1 (idxpqfs->implem);
613
+ READ1 (idxpqfs->bbs);
614
+ READ1 (idxpqfs->qbs);
615
+ READ1 (idxpqfs->ntotal2);
616
+ READ1 (idxpqfs->M2);
617
+ READVECTOR (idxpqfs->codes);
618
+ idx = idxpqfs;
619
+
620
+ } else if (h == fourcc("IwPf")) {
621
+ IndexIVFPQFastScan *ivpq = new IndexIVFPQFastScan();
622
+ read_ivf_header (ivpq, f);
623
+ READ1 (ivpq->by_residual);
624
+ READ1 (ivpq->code_size);
625
+ READ1 (ivpq->bbs);
626
+ READ1 (ivpq->M2);
627
+ READ1 (ivpq->implem);
628
+ READ1 (ivpq->qbs2);
629
+ read_ProductQuantizer (&ivpq->pq, f);
630
+ read_InvertedLists (ivpq, f, io_flags);
631
+ ivpq->precompute_table();
632
+ idx = ivpq;
601
633
  } else {
602
- FAISS_THROW_FMT("Index type 0x%08x not supported\n", h);
634
+ FAISS_THROW_FMT(
635
+ "Index type 0x%08x (\"%s\") not recognized",
636
+ h, fourcc_inv_printable(h).c_str()
637
+ );
603
638
  idx = nullptr;
604
639
  }
605
640
  return idx;
@@ -780,7 +815,10 @@ IndexBinary *read_index_binary (IOReader *f, int io_flags) {
780
815
  }
781
816
  idx = idxmh;
782
817
  } else {
783
- FAISS_THROW_FMT("Index type 0x%08x not supported\n", h);
818
+ FAISS_THROW_FMT(
819
+ "Index type %08x (\"%s\") not recognized",
820
+ h, fourcc_inv_printable(h).c_str()
821
+ );
784
822
  idx = nullptr;
785
823
  }
786
824
  return idx;
@@ -797,76 +835,6 @@ IndexBinary *read_index_binary (const char *fname, int io_flags) {
797
835
  return idx;
798
836
  }
799
837
 
800
- #ifndef _MSC_VER
801
-
802
- /**********************************************************
803
- * InvertedListIOHook's
804
- **********************************************************/
805
-
806
- InvertedListsIOHook::InvertedListsIOHook(
807
- const std::string & key, const std::string & classname):
808
- key(key), classname(classname)
809
- {}
810
-
811
- namespace {
812
-
813
- /// std::vector that deletes its contents
814
- struct IOHookTable: std::vector<InvertedListsIOHook*> {
815
-
816
- IOHookTable() {
817
- push_back(new OnDiskInvertedListsIOHook());
818
- }
819
-
820
- ~IOHookTable() {
821
- for (auto x: *this) {
822
- delete x;
823
- }
824
- }
825
- };
826
-
827
- static IOHookTable InvertedListsIOHook_table;
828
-
829
- } // anonymous namepsace
830
-
831
- InvertedListsIOHook* InvertedListsIOHook::lookup(int h)
832
- {
833
- for(const auto & callback: InvertedListsIOHook_table) {
834
- if (h == fourcc(callback->key)) {
835
- return callback;
836
- }
837
- }
838
- FAISS_THROW_FMT ("read_InvertedLists: could not load ArrayInvertedLists as %04x", h);
839
- }
840
-
841
- InvertedListsIOHook* InvertedListsIOHook::lookup_classname(const std::string & classname)
842
- {
843
- for(const auto & callback: InvertedListsIOHook_table) {
844
- if (callback->classname == classname) {
845
- return callback;
846
- }
847
- }
848
- FAISS_THROW_FMT ("read_InvertedLists: could not find classname %s", classname.c_str());
849
- }
850
-
851
- void InvertedListsIOHook::add_callback(InvertedListsIOHook *cb)
852
- {
853
- InvertedListsIOHook_table.push_back(cb);
854
- }
855
-
856
- void InvertedListsIOHook::print_callbacks()
857
- {
858
- printf("registered %zd InvertedListsIOHooks:\n",
859
- InvertedListsIOHook_table.size());
860
- for(const auto & cb: InvertedListsIOHook_table) {
861
- printf("%08x %s %s\n",
862
- fourcc(cb->key.c_str()),
863
- cb->key.c_str(),
864
- cb->classname.c_str());
865
- }
866
- }
867
-
868
- #endif // !_MSC_VER
869
-
870
838
 
871
839
 
872
840
  } // namespace faiss
@@ -15,9 +15,7 @@
15
15
  #include <sys/types.h>
16
16
  #include <sys/stat.h>
17
17
 
18
- #ifndef _MSC_VER
19
- #include <sys/mman.h>
20
- #endif // !_MSC_VER
18
+ #include <faiss/invlists/InvertedListsIOHook.h>
21
19
 
22
20
  #include <faiss/impl/FaissAssert.h>
23
21
  #include <faiss/impl/io.h>
@@ -39,6 +37,9 @@
39
37
  #include <faiss/IndexScalarQuantizer.h>
40
38
  #include <faiss/IndexHNSW.h>
41
39
  #include <faiss/IndexLattice.h>
40
+ #include <faiss/IndexPQFastScan.h>
41
+ #include <faiss/IndexIVFPQFastScan.h>
42
+ #include <faiss/IndexRefine.h>
42
43
 
43
44
  #include <faiss/IndexBinaryFlat.h>
44
45
  #include <faiss/IndexBinaryFromFloat.h>
@@ -46,11 +47,6 @@
46
47
  #include <faiss/IndexBinaryIVF.h>
47
48
  #include <faiss/IndexBinaryHash.h>
48
49
 
49
- #ifndef _MSC_VER
50
- #include <faiss/OnDiskInvertedLists.h>
51
- #endif // !_MSC_VER
52
-
53
-
54
50
  /*************************************************************
55
51
  * The I/O format is the content of the class. For objects that are
56
52
  * inherited, like Index, a 4-character-code (fourcc) indicates which
@@ -66,9 +62,6 @@
66
62
  * or deprecated fields), the fourcc can be replaced. New code should
67
63
  * be able to read the old fourcc and fill in new classes.
68
64
  *
69
- * TODO: serialization to strings for use in Python pickle or Torch
70
- * serialization.
71
- *
72
65
  * TODO: in this file, the read functions that encouter errors may
73
66
  * leak memory.
74
67
  **************************************************************/
@@ -216,19 +209,10 @@ void write_InvertedLists (const InvertedLists *ils, IOWriter *f) {
216
209
  WRITEANDCHECK (ails->ids[i].data(), n);
217
210
  }
218
211
  }
219
- #ifndef _MSC_VER
220
- } else {
221
212
 
213
+ } else {
222
214
  InvertedListsIOHook::lookup_classname(
223
215
  typeid(*ils).name())->write(ils, f);
224
-
225
- /*
226
- fprintf(stderr, "WARN! write_InvertedLists: unsupported invlist type, "
227
- "saving null invlist\n");
228
- uint32_t h = fourcc ("il00");
229
- WRITE1 (h);
230
- */
231
- #endif // !_MSC_VER
232
216
  }
233
217
  }
234
218
 
@@ -409,13 +393,13 @@ void write_index (const Index *idx, IOWriter *f) {
409
393
  WRITE1 (h);
410
394
  write_index_header (imiq, f);
411
395
  write_ProductQuantizer (&imiq->pq, f);
412
- } else if(const IndexRefineFlat * idxrf =
413
- dynamic_cast<const IndexRefineFlat *> (idx)) {
396
+ } else if(const IndexRefine * idxrf =
397
+ dynamic_cast<const IndexRefine *> (idx)) {
414
398
  uint32_t h = fourcc ("IxRF");
415
399
  WRITE1 (h);
416
400
  write_index_header (idxrf, f);
417
401
  write_index (idxrf->base_index, f);
418
- write_index (&idxrf->refine_index, f);
402
+ write_index (idxrf->refine_index, f);
419
403
  WRITE1 (idxrf->k_factor);
420
404
  } else if(const IndexIDMap * idxmap =
421
405
  dynamic_cast<const IndexIDMap *> (idx)) {
@@ -440,8 +424,33 @@ void write_index (const Index *idx, IOWriter *f) {
440
424
  write_index_header (idxhnsw, f);
441
425
  write_HNSW (&idxhnsw->hnsw, f);
442
426
  write_index (idxhnsw->storage, f);
427
+ } else if (const IndexPQFastScan *idxpqfs =
428
+ dynamic_cast<const IndexPQFastScan*>(idx)) {
429
+ uint32_t h = fourcc("IPfs");
430
+ WRITE1 (h);
431
+ write_index_header (idxpqfs, f);
432
+ write_ProductQuantizer (&idxpqfs->pq, f);
433
+ WRITE1 (idxpqfs->implem);
434
+ WRITE1 (idxpqfs->bbs);
435
+ WRITE1 (idxpqfs->qbs);
436
+ WRITE1 (idxpqfs->ntotal2);
437
+ WRITE1 (idxpqfs->M2);
438
+ WRITEVECTOR (idxpqfs->codes);
439
+ } else if (const IndexIVFPQFastScan * ivpq =
440
+ dynamic_cast<const IndexIVFPQFastScan *> (idx)) {
441
+ uint32_t h = fourcc ("IwPf");
442
+ WRITE1 (h);
443
+ write_ivf_header (ivpq, f);
444
+ WRITE1 (ivpq->by_residual);
445
+ WRITE1 (ivpq->code_size);
446
+ WRITE1 (ivpq->bbs);
447
+ WRITE1 (ivpq->M2);
448
+ WRITE1 (ivpq->implem);
449
+ WRITE1 (ivpq->qbs2);
450
+ write_ProductQuantizer (&ivpq->pq, f);
451
+ write_InvertedLists (ivpq->invlists, f);
443
452
  } else {
444
- FAISS_THROW_MSG ("don't know how to serialize this type of index");
453
+ FAISS_THROW_MSG ("don't know how to serialize this type of index");
445
454
  }
446
455
  }
447
456