faiss 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +16 -4
  5. data/ext/faiss/ext.cpp +12 -308
  6. data/ext/faiss/extconf.rb +6 -3
  7. data/ext/faiss/index.cpp +189 -0
  8. data/ext/faiss/index_binary.cpp +75 -0
  9. data/ext/faiss/kmeans.cpp +40 -0
  10. data/ext/faiss/numo.hpp +867 -0
  11. data/ext/faiss/pca_matrix.cpp +33 -0
  12. data/ext/faiss/product_quantizer.cpp +53 -0
  13. data/ext/faiss/utils.cpp +13 -0
  14. data/ext/faiss/utils.h +5 -0
  15. data/lib/faiss.rb +0 -5
  16. data/lib/faiss/version.rb +1 -1
  17. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  18. data/vendor/faiss/faiss/AutoTune.h +6 -3
  19. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  20. data/vendor/faiss/faiss/Index.cpp +3 -4
  21. data/vendor/faiss/faiss/Index.h +3 -3
  22. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  23. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  24. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  25. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  26. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  27. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  28. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  29. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  30. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  31. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  32. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  33. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  34. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  35. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  36. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  37. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  38. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  39. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  41. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  42. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  43. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  44. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  45. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  46. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  47. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  48. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  49. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  50. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  51. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  52. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  53. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  54. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  55. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  56. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  57. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  58. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  59. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  60. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  61. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  62. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  63. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  64. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  65. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  66. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  67. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  68. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  69. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  70. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  71. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  72. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  73. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  74. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  75. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  76. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  77. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  78. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  79. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  80. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  81. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  82. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  83. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  84. data/vendor/faiss/faiss/impl/io.h +7 -2
  85. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  86. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  87. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  88. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  89. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  90. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  91. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  92. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  93. data/vendor/faiss/faiss/index_io.h +1 -48
  94. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  95. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  96. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  97. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  98. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  99. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  100. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  101. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  102. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  103. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  104. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  105. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  106. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  107. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  108. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  109. data/vendor/faiss/faiss/utils/distances.h +28 -20
  110. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  111. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  112. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  113. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  114. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  115. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  116. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  117. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  118. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  119. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  120. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  121. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  122. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  123. metadata +54 -149
  124. data/lib/faiss/index.rb +0 -20
  125. data/lib/faiss/index_binary.rb +0 -20
  126. data/lib/faiss/kmeans.rb +0 -15
  127. data/lib/faiss/pca_matrix.rb +0 -15
  128. data/lib/faiss/product_quantizer.rb +0 -22
  129. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  130. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  131. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  132. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  133. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  134. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  135. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  136. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  137. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  138. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  139. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  140. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  141. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  142. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  143. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  144. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  145. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  146. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  147. data/vendor/faiss/c_api/Index_c.h +0 -183
  148. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  149. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  150. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  151. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  152. data/vendor/faiss/c_api/error_c.h +0 -42
  153. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  154. data/vendor/faiss/c_api/error_impl.h +0 -16
  155. data/vendor/faiss/c_api/faiss_c.h +0 -58
  156. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  157. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  158. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  159. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  160. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  161. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  162. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  163. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  164. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  165. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  166. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  167. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  168. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  169. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  170. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  171. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  172. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  173. data/vendor/faiss/c_api/index_io_c.h +0 -50
  174. data/vendor/faiss/c_api/macros_impl.h +0 -110
  175. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  176. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  177. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  178. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  179. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  180. data/vendor/faiss/misc/test_blas.cpp +0 -87
  181. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  182. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  183. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  184. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  185. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  186. data/vendor/faiss/tests/test_merge.cpp +0 -260
  187. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  188. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  189. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  190. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  191. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  192. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  193. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  194. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  195. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  196. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  197. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  198. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  199. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -7,7 +7,7 @@
7
7
 
8
8
  // -*- c++ -*-
9
9
 
10
- #include <faiss/DirectMap.h>
10
+ #include <faiss/invlists/DirectMap.h>
11
11
 
12
12
  #include <cstdio>
13
13
  #include <cassert>
@@ -10,7 +10,7 @@
10
10
  #ifndef FAISS_DIRECT_MAP_H
11
11
  #define FAISS_DIRECT_MAP_H
12
12
 
13
- #include <faiss/InvertedLists.h>
13
+ #include <faiss/invlists/InvertedLists.h>
14
14
  #include <unordered_map>
15
15
 
16
16
 
@@ -7,7 +7,7 @@
7
7
 
8
8
  // -*- c++ -*-
9
9
 
10
- #include <faiss/InvertedLists.h>
10
+ #include <faiss/invlists/InvertedLists.h>
11
11
 
12
12
  #include <cstdio>
13
13
 
@@ -616,6 +616,77 @@ void MaskedInvertedLists::prefetch_lists (
616
616
  il1->prefetch_lists (list1.data(), list1.size());
617
617
  }
618
618
 
619
+ /*****************************************
620
+ * MaskedInvertedLists implementation
621
+ ******************************************/
622
+
623
+
624
+ StopWordsInvertedLists::StopWordsInvertedLists (
625
+ const InvertedLists *il0, size_t maxsize):
626
+ ReadOnlyInvertedLists (il0->nlist, il0->code_size),
627
+ il0 (il0), maxsize (maxsize)
628
+ {
629
+
630
+ }
631
+
632
+ size_t StopWordsInvertedLists::list_size(size_t list_no) const
633
+ {
634
+ size_t sz = il0->list_size(list_no);
635
+ return sz < maxsize ? sz : 0;
636
+ }
637
+
638
+ const uint8_t * StopWordsInvertedLists::get_codes (size_t list_no) const
639
+ {
640
+ return il0->list_size(list_no) < maxsize ? il0->get_codes(list_no) : nullptr;
641
+ }
642
+
643
+ const idx_t * StopWordsInvertedLists::get_ids (size_t list_no) const
644
+ {
645
+ return il0->list_size(list_no) < maxsize ? il0->get_ids(list_no) : nullptr;
646
+ }
647
+
648
+ void StopWordsInvertedLists::release_codes (
649
+ size_t list_no, const uint8_t *codes) const
650
+ {
651
+ if (il0->list_size (list_no) < maxsize) {
652
+ il0->release_codes (list_no, codes);
653
+ }
654
+ }
655
+
656
+ void StopWordsInvertedLists::release_ids (size_t list_no, const idx_t *ids) const
657
+ {
658
+ if (il0->list_size (list_no) < maxsize) {
659
+ il0->release_ids (list_no, ids);
660
+ }
661
+ }
662
+
663
+ idx_t StopWordsInvertedLists::get_single_id (size_t list_no, size_t offset) const
664
+ {
665
+ FAISS_THROW_IF_NOT(il0->list_size (list_no) < maxsize);
666
+ return il0->get_single_id (list_no, offset);
667
+ }
668
+
669
+ const uint8_t * StopWordsInvertedLists::get_single_code (
670
+ size_t list_no, size_t offset) const
671
+ {
672
+ FAISS_THROW_IF_NOT(il0->list_size (list_no) < maxsize);
673
+ return il0->get_single_code (list_no, offset);
674
+ }
675
+
676
+ void StopWordsInvertedLists::prefetch_lists (
677
+ const idx_t *list_nos, int nlist) const
678
+ {
679
+ std::vector<idx_t> list0;
680
+ for (int i = 0; i < nlist; i++) {
681
+ idx_t list_no = list_nos[i];
682
+ if (list_no < 0) continue;
683
+ if (il0->list_size(list_no) < maxsize) {
684
+ list0.push_back(list_no);
685
+ }
686
+ }
687
+ il0->prefetch_lists (list0.data(), list0.size());
688
+ }
689
+
619
690
 
620
691
 
621
692
  } // namespace faiss
@@ -36,6 +36,10 @@ struct InvertedLists {
36
36
 
37
37
  InvertedLists (size_t nlist, size_t code_size);
38
38
 
39
+ /// used for BlockInvertedLists, where the codes are packed into groups
40
+ /// and the individual code size is meaningless
41
+ static const size_t INVALID_CODE_SIZE = static_cast<size_t>(-1);
42
+
39
43
  /*************************
40
44
  * Read only functions */
41
45
 
@@ -198,7 +202,7 @@ struct ArrayInvertedLists: InvertedLists {
198
202
 
199
203
  void resize (size_t list_no, size_t new_size) override;
200
204
 
201
- virtual ~ArrayInvertedLists ();
205
+ ~ArrayInvertedLists () override;
202
206
  };
203
207
 
204
208
  /*****************************************************************
@@ -329,6 +333,33 @@ struct MaskedInvertedLists: ReadOnlyInvertedLists {
329
333
 
330
334
  };
331
335
 
336
+
337
+ /** if the inverted list in il is smaller than maxsize then return it,
338
+ * otherwise return an empty invlist */
339
+ struct StopWordsInvertedLists: ReadOnlyInvertedLists {
340
+
341
+ const InvertedLists *il0;
342
+ size_t maxsize;
343
+
344
+ StopWordsInvertedLists (const InvertedLists *il, size_t maxsize);
345
+
346
+ size_t list_size(size_t list_no) const override;
347
+ const uint8_t * get_codes (size_t list_no) const override;
348
+ const idx_t * get_ids (size_t list_no) const override;
349
+
350
+ void release_codes (size_t list_no, const uint8_t *codes) const override;
351
+ void release_ids (size_t list_no, const idx_t *ids) const override;
352
+
353
+ idx_t get_single_id (size_t list_no, size_t offset) const override;
354
+
355
+ const uint8_t * get_single_code (
356
+ size_t list_no, size_t offset) const override;
357
+
358
+ void prefetch_lists (const idx_t *list_nos, int nlist) const override;
359
+
360
+ };
361
+
362
+
332
363
  } // namespace faiss
333
364
 
334
365
 
@@ -0,0 +1,107 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/invlists/InvertedListsIOHook.h>
9
+
10
+ #include <faiss/impl/io.h>
11
+ #include <faiss/impl/io_macros.h>
12
+ #include <faiss/impl/FaissAssert.h>
13
+
14
+ #include <faiss/invlists/BlockInvertedLists.h>
15
+
16
+ #ifndef _MSC_VER
17
+ #include <faiss/invlists/OnDiskInvertedLists.h>
18
+ #endif // !_MSC_VER
19
+
20
+
21
+ namespace faiss {
22
+
23
+
24
+ /**********************************************************
25
+ * InvertedListIOHook's
26
+ **********************************************************/
27
+
28
+ InvertedListsIOHook::InvertedListsIOHook(
29
+ const std::string & key, const std::string & classname):
30
+ key(key), classname(classname)
31
+ {}
32
+
33
+ namespace {
34
+
35
+ /// std::vector that deletes its contents
36
+ struct IOHookTable: std::vector<InvertedListsIOHook*> {
37
+
38
+ IOHookTable() {
39
+ #ifndef _MSC_VER
40
+ push_back(new OnDiskInvertedListsIOHook());
41
+ #endif
42
+ push_back(new BlockInvertedListsIOHook());
43
+ }
44
+
45
+ ~IOHookTable() {
46
+ for (auto x: *this) {
47
+ delete x;
48
+ }
49
+ }
50
+ };
51
+
52
+ static IOHookTable InvertedListsIOHook_table;
53
+
54
+ } // anonymous namepsace
55
+
56
+ InvertedListsIOHook* InvertedListsIOHook::lookup(int h)
57
+ {
58
+ for(const auto & callback: InvertedListsIOHook_table) {
59
+ if (h == fourcc(callback->key)) {
60
+ return callback;
61
+ }
62
+ }
63
+ FAISS_THROW_FMT (
64
+ "read_InvertedLists: could not load ArrayInvertedLists as "
65
+ "%08x (\"%s\")", h, fourcc_inv_printable(h).c_str()
66
+ );
67
+ }
68
+
69
+ InvertedListsIOHook* InvertedListsIOHook::lookup_classname(const std::string & classname)
70
+ {
71
+ for(const auto & callback: InvertedListsIOHook_table) {
72
+ if (callback->classname == classname) {
73
+ return callback;
74
+ }
75
+ }
76
+ FAISS_THROW_FMT (
77
+ "read_InvertedLists: could not find classname %s",
78
+ classname.c_str()
79
+ );
80
+ }
81
+
82
+ void InvertedListsIOHook::add_callback(InvertedListsIOHook *cb)
83
+ {
84
+ InvertedListsIOHook_table.push_back(cb);
85
+ }
86
+
87
+ void InvertedListsIOHook::print_callbacks()
88
+ {
89
+ printf("registered %zd InvertedListsIOHooks:\n",
90
+ InvertedListsIOHook_table.size());
91
+ for(const auto & cb: InvertedListsIOHook_table) {
92
+ printf("%08x %s %s\n",
93
+ fourcc(cb->key.c_str()),
94
+ cb->key.c_str(),
95
+ cb->classname.c_str());
96
+ }
97
+ }
98
+
99
+ InvertedLists * InvertedListsIOHook::read_ArrayInvertedLists(
100
+ IOReader *, int ,
101
+ size_t , size_t ,
102
+ const std::vector<size_t> &) const
103
+ {
104
+ FAISS_THROW_FMT("read to array not implemented for %s", classname.c_str());
105
+ }
106
+
107
+ } // namespace faiss
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <string>
11
+ #include <faiss/invlists/InvertedLists.h>
12
+ #include <faiss/impl/io.h>
13
+
14
+
15
+ namespace faiss {
16
+
17
+
18
+ /** Callbacks to handle other types of InvertedList objects.
19
+ *
20
+ * The callbacks should be registered with add_callback before calling
21
+ * read_index or read_InvertedLists. The callbacks for
22
+ * OnDiskInvertedLists are registrered by default. The invlist type is
23
+ * identified by:
24
+ *
25
+ * - the key (a fourcc) at read time
26
+ * - the class name (as given by typeid.name) at write time
27
+ */
28
+ struct InvertedListsIOHook {
29
+ const std::string key; ///< string version of the fourcc
30
+ const std::string classname; ///< typeid.name
31
+
32
+ InvertedListsIOHook(const std::string & key, const std::string & classname);
33
+
34
+ /// write the index to the IOWriter (including the fourcc)
35
+ virtual void write(const InvertedLists *ils, IOWriter *f) const = 0;
36
+
37
+ /// called when the fourcc matches this class's fourcc
38
+ virtual InvertedLists * read(IOReader *f, int io_flags) const = 0;
39
+
40
+ /** read from a ArrayInvertedLists into this invertedlist type.
41
+ * For this to work, the callback has to be enabled and the io_flag has to be set to
42
+ * IO_FLAG_SKIP_IVF_DATA | (16 upper bits of the fourcc)
43
+ *
44
+ * (default implementation fails)
45
+ */
46
+ virtual InvertedLists * read_ArrayInvertedLists(
47
+ IOReader *f, int io_flags,
48
+ size_t nlist, size_t code_size,
49
+ const std::vector<size_t> &sizes) const;
50
+
51
+ virtual ~InvertedListsIOHook() {}
52
+
53
+ /**************************** Manage the set of callbacks ******/
54
+
55
+ // transfers ownership
56
+ static void add_callback(InvertedListsIOHook *);
57
+ static void print_callbacks();
58
+ static InvertedListsIOHook* lookup(int h);
59
+ static InvertedListsIOHook* lookup_classname(const std::string & classname);
60
+
61
+ };
62
+
63
+ } // namespace faiss
@@ -7,7 +7,7 @@
7
7
 
8
8
  // -*- c++ -*-
9
9
 
10
- #include <faiss/OnDiskInvertedLists.h>
10
+ #include <faiss/invlists/OnDiskInvertedLists.h>
11
11
 
12
12
  #include <pthread.h>
13
13
 
@@ -130,7 +130,8 @@ struct LockLevels {
130
130
 
131
131
  void print () {
132
132
  pthread_mutex_lock(&mutex1);
133
- printf("State: level3_in_use=%d n_level2=%d level1_holders: [", level3_in_use, n_level2);
133
+ printf("State: level3_in_use=%d n_level2=%d level1_holders: [",
134
+ int(level3_in_use), n_level2);
134
135
  for (int k : level1_holders) {
135
136
  printf("%d ", k);
136
137
  }
@@ -299,8 +300,7 @@ void OnDiskInvertedLists::update_totsize (size_t new_size)
299
300
  // unmap file
300
301
  if (ptr != nullptr) {
301
302
  int err = munmap (ptr, totsize);
302
- FAISS_THROW_IF_NOT_FMT (err == 0, "munmap error: %s",
303
- strerror(errno));
303
+ FAISS_THROW_IF_NOT_FMT (err == 0, "munmap error: %s", strerror(errno));
304
304
  }
305
305
  if (totsize == 0) {
306
306
  // must create file before truncating it
@@ -516,8 +516,9 @@ size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
516
516
  if (it == slots.end()) {
517
517
  // not enough capacity
518
518
  size_t new_size = totsize == 0 ? 32 : totsize * 2;
519
- while (new_size - totsize < capacity)
519
+ while (new_size - totsize < capacity) {
520
520
  new_size *= 2;
521
+ }
521
522
  locks->lock_3 ();
522
523
  update_totsize(new_size);
523
524
  locks->unlock_3 ();
@@ -678,6 +679,18 @@ void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1)
678
679
  nlist = l1 - l0;
679
680
  }
680
681
 
682
+
683
+ void OnDiskInvertedLists::set_all_lists_sizes(const size_t *sizes)
684
+ {
685
+ size_t ofs = 0;
686
+ for (size_t i = 0; i < nlist; i++) {
687
+ lists[i].offset = ofs;
688
+ lists[i].capacity = lists[i].size = sizes[i];
689
+ ofs += sizes[i] * (sizeof(idx_t) + code_size);
690
+ }
691
+
692
+ }
693
+
681
694
  /*******************************************************
682
695
  * I/O support via callbacks
683
696
  *******************************************************/
@@ -755,7 +768,9 @@ InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
755
768
 
756
769
  }
757
770
  READ1(od->totsize);
758
- od->do_mmap();
771
+ if (!(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
772
+ od->do_mmap();
773
+ }
759
774
  return od;
760
775
  }
761
776
 
@@ -15,7 +15,7 @@
15
15
  #include <typeinfo>
16
16
 
17
17
  #include <faiss/IndexIVF.h>
18
-
18
+ #include <faiss/invlists/InvertedListsIOHook.h>
19
19
  #include <faiss/index_io.h>
20
20
 
21
21
  namespace faiss {
@@ -109,7 +109,7 @@ struct OnDiskInvertedLists: InvertedLists {
109
109
 
110
110
  void prefetch_lists (const idx_t *list_nos, int nlist) const override;
111
111
 
112
- virtual ~OnDiskInvertedLists ();
112
+ ~OnDiskInvertedLists () override;
113
113
 
114
114
  // private
115
115
 
@@ -126,6 +126,9 @@ struct OnDiskInvertedLists: InvertedLists {
126
126
  size_t allocate_slot (size_t capacity);
127
127
  void free_slot (size_t offset, size_t capacity);
128
128
 
129
+ /// override all list sizes and make a packed storage
130
+ void set_all_lists_sizes(const size_t *sizes);
131
+
129
132
  // empty constructor for the I/O functions
130
133
  OnDiskInvertedLists ();
131
134
  };
@@ -1,8 +1,15 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
1
8
  #pragma once
2
9
 
3
10
  #include "Python.h"
4
11
  #include <faiss/impl/io.h>
5
- #include <faiss/InvertedLists.h>
12
+ #include <faiss/invlists/InvertedLists.h>
6
13
 
7
14
  // all callbacks have to acquire the GIL on input
8
15