faiss 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (199) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +16 -4
  5. data/ext/faiss/ext.cpp +12 -308
  6. data/ext/faiss/extconf.rb +6 -3
  7. data/ext/faiss/index.cpp +189 -0
  8. data/ext/faiss/index_binary.cpp +75 -0
  9. data/ext/faiss/kmeans.cpp +40 -0
  10. data/ext/faiss/numo.hpp +867 -0
  11. data/ext/faiss/pca_matrix.cpp +33 -0
  12. data/ext/faiss/product_quantizer.cpp +53 -0
  13. data/ext/faiss/utils.cpp +13 -0
  14. data/ext/faiss/utils.h +5 -0
  15. data/lib/faiss.rb +0 -5
  16. data/lib/faiss/version.rb +1 -1
  17. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  18. data/vendor/faiss/faiss/AutoTune.h +6 -3
  19. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  20. data/vendor/faiss/faiss/Index.cpp +3 -4
  21. data/vendor/faiss/faiss/Index.h +3 -3
  22. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  23. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  25. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  26. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  27. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  29. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  30. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  31. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  32. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  33. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  34. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  35. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  37. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  38. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  39. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  41. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  42. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  43. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  44. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  45. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  46. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  47. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  48. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  49. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  50. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  51. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  52. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  53. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  54. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  55. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  56. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  57. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  58. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  59. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  60. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  61. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  62. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  63. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  64. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  65. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  66. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  67. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  68. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  69. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  70. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  71. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  72. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  73. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  74. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  75. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  76. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  77. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  78. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  79. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  80. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  81. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  82. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  83. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  84. data/vendor/faiss/faiss/impl/io.h +7 -2
  85. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  86. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  87. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  88. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  89. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  90. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  91. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  92. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  93. data/vendor/faiss/faiss/index_io.h +1 -48
  94. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  95. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  96. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  97. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  98. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  99. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  100. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  101. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  102. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  103. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  104. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  105. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  106. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  107. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  108. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  109. data/vendor/faiss/faiss/utils/distances.h +28 -20
  110. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  111. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  112. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  113. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  114. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  115. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  116. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  117. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  118. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  119. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  120. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  121. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  122. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  123. metadata +54 -149
  124. data/lib/faiss/index.rb +0 -20
  125. data/lib/faiss/index_binary.rb +0 -20
  126. data/lib/faiss/kmeans.rb +0 -15
  127. data/lib/faiss/pca_matrix.rb +0 -15
  128. data/lib/faiss/product_quantizer.rb +0 -22
  129. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  130. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  131. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  132. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  133. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  134. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  135. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  136. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  137. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  138. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  139. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  140. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  141. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  142. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  143. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  144. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  145. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  146. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  147. data/vendor/faiss/c_api/Index_c.h +0 -183
  148. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  149. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  150. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  151. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  152. data/vendor/faiss/c_api/error_c.h +0 -42
  153. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  154. data/vendor/faiss/c_api/error_impl.h +0 -16
  155. data/vendor/faiss/c_api/faiss_c.h +0 -58
  156. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  157. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  158. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  159. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  160. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  161. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  162. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  163. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  164. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  165. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  166. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  167. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  168. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  169. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  170. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  171. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  172. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  173. data/vendor/faiss/c_api/index_io_c.h +0 -50
  174. data/vendor/faiss/c_api/macros_impl.h +0 -110
  175. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  176. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  177. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  178. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  179. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  180. data/vendor/faiss/misc/test_blas.cpp +0 -87
  181. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  182. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  183. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  184. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  185. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  186. data/vendor/faiss/tests/test_merge.cpp +0 -260
  187. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  188. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  189. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  190. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  191. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  192. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  193. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  194. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  195. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  196. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  197. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  198. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  199. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -7,7 +7,7 @@
7
7
 
8
8
  // -*- c++ -*-
9
9
 
10
- #include <faiss/DirectMap.h>
10
+ #include <faiss/invlists/DirectMap.h>
11
11
 
12
12
  #include <cstdio>
13
13
  #include <cassert>
@@ -10,7 +10,7 @@
10
10
  #ifndef FAISS_DIRECT_MAP_H
11
11
  #define FAISS_DIRECT_MAP_H
12
12
 
13
- #include <faiss/InvertedLists.h>
13
+ #include <faiss/invlists/InvertedLists.h>
14
14
  #include <unordered_map>
15
15
 
16
16
 
@@ -7,7 +7,7 @@
7
7
 
8
8
  // -*- c++ -*-
9
9
 
10
- #include <faiss/InvertedLists.h>
10
+ #include <faiss/invlists/InvertedLists.h>
11
11
 
12
12
  #include <cstdio>
13
13
 
@@ -616,6 +616,77 @@ void MaskedInvertedLists::prefetch_lists (
616
616
  il1->prefetch_lists (list1.data(), list1.size());
617
617
  }
618
618
 
619
+ /*****************************************
620
+ * MaskedInvertedLists implementation
621
+ ******************************************/
622
+
623
+
624
+ StopWordsInvertedLists::StopWordsInvertedLists (
625
+ const InvertedLists *il0, size_t maxsize):
626
+ ReadOnlyInvertedLists (il0->nlist, il0->code_size),
627
+ il0 (il0), maxsize (maxsize)
628
+ {
629
+
630
+ }
631
+
632
+ size_t StopWordsInvertedLists::list_size(size_t list_no) const
633
+ {
634
+ size_t sz = il0->list_size(list_no);
635
+ return sz < maxsize ? sz : 0;
636
+ }
637
+
638
+ const uint8_t * StopWordsInvertedLists::get_codes (size_t list_no) const
639
+ {
640
+ return il0->list_size(list_no) < maxsize ? il0->get_codes(list_no) : nullptr;
641
+ }
642
+
643
+ const idx_t * StopWordsInvertedLists::get_ids (size_t list_no) const
644
+ {
645
+ return il0->list_size(list_no) < maxsize ? il0->get_ids(list_no) : nullptr;
646
+ }
647
+
648
+ void StopWordsInvertedLists::release_codes (
649
+ size_t list_no, const uint8_t *codes) const
650
+ {
651
+ if (il0->list_size (list_no) < maxsize) {
652
+ il0->release_codes (list_no, codes);
653
+ }
654
+ }
655
+
656
+ void StopWordsInvertedLists::release_ids (size_t list_no, const idx_t *ids) const
657
+ {
658
+ if (il0->list_size (list_no) < maxsize) {
659
+ il0->release_ids (list_no, ids);
660
+ }
661
+ }
662
+
663
+ idx_t StopWordsInvertedLists::get_single_id (size_t list_no, size_t offset) const
664
+ {
665
+ FAISS_THROW_IF_NOT(il0->list_size (list_no) < maxsize);
666
+ return il0->get_single_id (list_no, offset);
667
+ }
668
+
669
+ const uint8_t * StopWordsInvertedLists::get_single_code (
670
+ size_t list_no, size_t offset) const
671
+ {
672
+ FAISS_THROW_IF_NOT(il0->list_size (list_no) < maxsize);
673
+ return il0->get_single_code (list_no, offset);
674
+ }
675
+
676
+ void StopWordsInvertedLists::prefetch_lists (
677
+ const idx_t *list_nos, int nlist) const
678
+ {
679
+ std::vector<idx_t> list0;
680
+ for (int i = 0; i < nlist; i++) {
681
+ idx_t list_no = list_nos[i];
682
+ if (list_no < 0) continue;
683
+ if (il0->list_size(list_no) < maxsize) {
684
+ list0.push_back(list_no);
685
+ }
686
+ }
687
+ il0->prefetch_lists (list0.data(), list0.size());
688
+ }
689
+
619
690
 
620
691
 
621
692
  } // namespace faiss
@@ -36,6 +36,10 @@ struct InvertedLists {
36
36
 
37
37
  InvertedLists (size_t nlist, size_t code_size);
38
38
 
39
+ /// used for BlockInvertedLists, where the codes are packed into groups
40
+ /// and the individual code size is meaningless
41
+ static const size_t INVALID_CODE_SIZE = static_cast<size_t>(-1);
42
+
39
43
  /*************************
40
44
  * Read only functions */
41
45
 
@@ -198,7 +202,7 @@ struct ArrayInvertedLists: InvertedLists {
198
202
 
199
203
  void resize (size_t list_no, size_t new_size) override;
200
204
 
201
- virtual ~ArrayInvertedLists ();
205
+ ~ArrayInvertedLists () override;
202
206
  };
203
207
 
204
208
  /*****************************************************************
@@ -329,6 +333,33 @@ struct MaskedInvertedLists: ReadOnlyInvertedLists {
329
333
 
330
334
  };
331
335
 
336
+
337
+ /** if the inverted list in il is smaller than maxsize then return it,
338
+ * otherwise return an empty invlist */
339
+ struct StopWordsInvertedLists: ReadOnlyInvertedLists {
340
+
341
+ const InvertedLists *il0;
342
+ size_t maxsize;
343
+
344
+ StopWordsInvertedLists (const InvertedLists *il, size_t maxsize);
345
+
346
+ size_t list_size(size_t list_no) const override;
347
+ const uint8_t * get_codes (size_t list_no) const override;
348
+ const idx_t * get_ids (size_t list_no) const override;
349
+
350
+ void release_codes (size_t list_no, const uint8_t *codes) const override;
351
+ void release_ids (size_t list_no, const idx_t *ids) const override;
352
+
353
+ idx_t get_single_id (size_t list_no, size_t offset) const override;
354
+
355
+ const uint8_t * get_single_code (
356
+ size_t list_no, size_t offset) const override;
357
+
358
+ void prefetch_lists (const idx_t *list_nos, int nlist) const override;
359
+
360
+ };
361
+
362
+
332
363
  } // namespace faiss
333
364
 
334
365
 
@@ -0,0 +1,107 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/invlists/InvertedListsIOHook.h>
9
+
10
+ #include <faiss/impl/io.h>
11
+ #include <faiss/impl/io_macros.h>
12
+ #include <faiss/impl/FaissAssert.h>
13
+
14
+ #include <faiss/invlists/BlockInvertedLists.h>
15
+
16
+ #ifndef _MSC_VER
17
+ #include <faiss/invlists/OnDiskInvertedLists.h>
18
+ #endif // !_MSC_VER
19
+
20
+
21
+ namespace faiss {
22
+
23
+
24
+ /**********************************************************
25
+ * InvertedListIOHook's
26
+ **********************************************************/
27
+
28
+ InvertedListsIOHook::InvertedListsIOHook(
29
+ const std::string & key, const std::string & classname):
30
+ key(key), classname(classname)
31
+ {}
32
+
33
+ namespace {
34
+
35
+ /// std::vector that deletes its contents
36
+ struct IOHookTable: std::vector<InvertedListsIOHook*> {
37
+
38
+ IOHookTable() {
39
+ #ifndef _MSC_VER
40
+ push_back(new OnDiskInvertedListsIOHook());
41
+ #endif
42
+ push_back(new BlockInvertedListsIOHook());
43
+ }
44
+
45
+ ~IOHookTable() {
46
+ for (auto x: *this) {
47
+ delete x;
48
+ }
49
+ }
50
+ };
51
+
52
+ static IOHookTable InvertedListsIOHook_table;
53
+
54
+ } // anonymous namepsace
55
+
56
+ InvertedListsIOHook* InvertedListsIOHook::lookup(int h)
57
+ {
58
+ for(const auto & callback: InvertedListsIOHook_table) {
59
+ if (h == fourcc(callback->key)) {
60
+ return callback;
61
+ }
62
+ }
63
+ FAISS_THROW_FMT (
64
+ "read_InvertedLists: could not load ArrayInvertedLists as "
65
+ "%08x (\"%s\")", h, fourcc_inv_printable(h).c_str()
66
+ );
67
+ }
68
+
69
+ InvertedListsIOHook* InvertedListsIOHook::lookup_classname(const std::string & classname)
70
+ {
71
+ for(const auto & callback: InvertedListsIOHook_table) {
72
+ if (callback->classname == classname) {
73
+ return callback;
74
+ }
75
+ }
76
+ FAISS_THROW_FMT (
77
+ "read_InvertedLists: could not find classname %s",
78
+ classname.c_str()
79
+ );
80
+ }
81
+
82
+ void InvertedListsIOHook::add_callback(InvertedListsIOHook *cb)
83
+ {
84
+ InvertedListsIOHook_table.push_back(cb);
85
+ }
86
+
87
+ void InvertedListsIOHook::print_callbacks()
88
+ {
89
+ printf("registered %zd InvertedListsIOHooks:\n",
90
+ InvertedListsIOHook_table.size());
91
+ for(const auto & cb: InvertedListsIOHook_table) {
92
+ printf("%08x %s %s\n",
93
+ fourcc(cb->key.c_str()),
94
+ cb->key.c_str(),
95
+ cb->classname.c_str());
96
+ }
97
+ }
98
+
99
+ InvertedLists * InvertedListsIOHook::read_ArrayInvertedLists(
100
+ IOReader *, int ,
101
+ size_t , size_t ,
102
+ const std::vector<size_t> &) const
103
+ {
104
+ FAISS_THROW_FMT("read to array not implemented for %s", classname.c_str());
105
+ }
106
+
107
+ } // namespace faiss
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <string>
11
+ #include <faiss/invlists/InvertedLists.h>
12
+ #include <faiss/impl/io.h>
13
+
14
+
15
+ namespace faiss {
16
+
17
+
18
+ /** Callbacks to handle other types of InvertedList objects.
19
+ *
20
+ * The callbacks should be registered with add_callback before calling
21
+ * read_index or read_InvertedLists. The callbacks for
22
+ * OnDiskInvertedLists are registrered by default. The invlist type is
23
+ * identified by:
24
+ *
25
+ * - the key (a fourcc) at read time
26
+ * - the class name (as given by typeid.name) at write time
27
+ */
28
+ struct InvertedListsIOHook {
29
+ const std::string key; ///< string version of the fourcc
30
+ const std::string classname; ///< typeid.name
31
+
32
+ InvertedListsIOHook(const std::string & key, const std::string & classname);
33
+
34
+ /// write the index to the IOWriter (including the fourcc)
35
+ virtual void write(const InvertedLists *ils, IOWriter *f) const = 0;
36
+
37
+ /// called when the fourcc matches this class's fourcc
38
+ virtual InvertedLists * read(IOReader *f, int io_flags) const = 0;
39
+
40
+ /** read from a ArrayInvertedLists into this invertedlist type.
41
+ * For this to work, the callback has to be enabled and the io_flag has to be set to
42
+ * IO_FLAG_SKIP_IVF_DATA | (16 upper bits of the fourcc)
43
+ *
44
+ * (default implementation fails)
45
+ */
46
+ virtual InvertedLists * read_ArrayInvertedLists(
47
+ IOReader *f, int io_flags,
48
+ size_t nlist, size_t code_size,
49
+ const std::vector<size_t> &sizes) const;
50
+
51
+ virtual ~InvertedListsIOHook() {}
52
+
53
+ /**************************** Manage the set of callbacks ******/
54
+
55
+ // transfers ownership
56
+ static void add_callback(InvertedListsIOHook *);
57
+ static void print_callbacks();
58
+ static InvertedListsIOHook* lookup(int h);
59
+ static InvertedListsIOHook* lookup_classname(const std::string & classname);
60
+
61
+ };
62
+
63
+ } // namespace faiss
@@ -7,7 +7,7 @@
7
7
 
8
8
  // -*- c++ -*-
9
9
 
10
- #include <faiss/OnDiskInvertedLists.h>
10
+ #include <faiss/invlists/OnDiskInvertedLists.h>
11
11
 
12
12
  #include <pthread.h>
13
13
 
@@ -130,7 +130,8 @@ struct LockLevels {
130
130
 
131
131
  void print () {
132
132
  pthread_mutex_lock(&mutex1);
133
- printf("State: level3_in_use=%d n_level2=%d level1_holders: [", level3_in_use, n_level2);
133
+ printf("State: level3_in_use=%d n_level2=%d level1_holders: [",
134
+ int(level3_in_use), n_level2);
134
135
  for (int k : level1_holders) {
135
136
  printf("%d ", k);
136
137
  }
@@ -299,8 +300,7 @@ void OnDiskInvertedLists::update_totsize (size_t new_size)
299
300
  // unmap file
300
301
  if (ptr != nullptr) {
301
302
  int err = munmap (ptr, totsize);
302
- FAISS_THROW_IF_NOT_FMT (err == 0, "munmap error: %s",
303
- strerror(errno));
303
+ FAISS_THROW_IF_NOT_FMT (err == 0, "munmap error: %s", strerror(errno));
304
304
  }
305
305
  if (totsize == 0) {
306
306
  // must create file before truncating it
@@ -516,8 +516,9 @@ size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
516
516
  if (it == slots.end()) {
517
517
  // not enough capacity
518
518
  size_t new_size = totsize == 0 ? 32 : totsize * 2;
519
- while (new_size - totsize < capacity)
519
+ while (new_size - totsize < capacity) {
520
520
  new_size *= 2;
521
+ }
521
522
  locks->lock_3 ();
522
523
  update_totsize(new_size);
523
524
  locks->unlock_3 ();
@@ -678,6 +679,18 @@ void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1)
678
679
  nlist = l1 - l0;
679
680
  }
680
681
 
682
+
683
+ void OnDiskInvertedLists::set_all_lists_sizes(const size_t *sizes)
684
+ {
685
+ size_t ofs = 0;
686
+ for (size_t i = 0; i < nlist; i++) {
687
+ lists[i].offset = ofs;
688
+ lists[i].capacity = lists[i].size = sizes[i];
689
+ ofs += sizes[i] * (sizeof(idx_t) + code_size);
690
+ }
691
+
692
+ }
693
+
681
694
  /*******************************************************
682
695
  * I/O support via callbacks
683
696
  *******************************************************/
@@ -755,7 +768,9 @@ InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
755
768
 
756
769
  }
757
770
  READ1(od->totsize);
758
- od->do_mmap();
771
+ if (!(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
772
+ od->do_mmap();
773
+ }
759
774
  return od;
760
775
  }
761
776
 
@@ -15,7 +15,7 @@
15
15
  #include <typeinfo>
16
16
 
17
17
  #include <faiss/IndexIVF.h>
18
-
18
+ #include <faiss/invlists/InvertedListsIOHook.h>
19
19
  #include <faiss/index_io.h>
20
20
 
21
21
  namespace faiss {
@@ -109,7 +109,7 @@ struct OnDiskInvertedLists: InvertedLists {
109
109
 
110
110
  void prefetch_lists (const idx_t *list_nos, int nlist) const override;
111
111
 
112
- virtual ~OnDiskInvertedLists ();
112
+ ~OnDiskInvertedLists () override;
113
113
 
114
114
  // private
115
115
 
@@ -126,6 +126,9 @@ struct OnDiskInvertedLists: InvertedLists {
126
126
  size_t allocate_slot (size_t capacity);
127
127
  void free_slot (size_t offset, size_t capacity);
128
128
 
129
+ /// override all list sizes and make a packed storage
130
+ void set_all_lists_sizes(const size_t *sizes);
131
+
129
132
  // empty constructor for the I/O functions
130
133
  OnDiskInvertedLists ();
131
134
  };
@@ -1,8 +1,15 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
1
8
  #pragma once
2
9
 
3
10
  #include "Python.h"
4
11
  #include <faiss/impl/io.h>
5
- #include <faiss/InvertedLists.h>
12
+ #include <faiss/invlists/InvertedLists.h>
6
13
 
7
14
  // all callbacks have to acquire the GIL on input
8
15