faiss 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/LICENSE.txt +1 -1
- data/README.md +16 -4
- data/ext/faiss/ext.cpp +12 -308
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/index.cpp +189 -0
- data/ext/faiss/index_binary.cpp +75 -0
- data/ext/faiss/kmeans.cpp +40 -0
- data/ext/faiss/numo.hpp +867 -0
- data/ext/faiss/pca_matrix.cpp +33 -0
- data/ext/faiss/product_quantizer.cpp +53 -0
- data/ext/faiss/utils.cpp +13 -0
- data/ext/faiss/utils.h +5 -0
- data/lib/faiss.rb +0 -5
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +54 -149
- data/lib/faiss/index.rb +0 -20
- data/lib/faiss/index_binary.rb +0 -20
- data/lib/faiss/kmeans.rb +0 -15
- data/lib/faiss/pca_matrix.rb +0 -15
- data/lib/faiss/product_quantizer.rb +0 -22
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
// -*- c++ -*-
|
|
9
9
|
|
|
10
|
-
#include <faiss/InvertedLists.h>
|
|
10
|
+
#include <faiss/invlists/InvertedLists.h>
|
|
11
11
|
|
|
12
12
|
#include <cstdio>
|
|
13
13
|
|
|
@@ -616,6 +616,77 @@ void MaskedInvertedLists::prefetch_lists (
|
|
|
616
616
|
il1->prefetch_lists (list1.data(), list1.size());
|
|
617
617
|
}
|
|
618
618
|
|
|
619
|
+
/*****************************************
|
|
620
|
+
* MaskedInvertedLists implementation
|
|
621
|
+
******************************************/
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
StopWordsInvertedLists::StopWordsInvertedLists (
|
|
625
|
+
const InvertedLists *il0, size_t maxsize):
|
|
626
|
+
ReadOnlyInvertedLists (il0->nlist, il0->code_size),
|
|
627
|
+
il0 (il0), maxsize (maxsize)
|
|
628
|
+
{
|
|
629
|
+
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
size_t StopWordsInvertedLists::list_size(size_t list_no) const
|
|
633
|
+
{
|
|
634
|
+
size_t sz = il0->list_size(list_no);
|
|
635
|
+
return sz < maxsize ? sz : 0;
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
const uint8_t * StopWordsInvertedLists::get_codes (size_t list_no) const
|
|
639
|
+
{
|
|
640
|
+
return il0->list_size(list_no) < maxsize ? il0->get_codes(list_no) : nullptr;
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
const idx_t * StopWordsInvertedLists::get_ids (size_t list_no) const
|
|
644
|
+
{
|
|
645
|
+
return il0->list_size(list_no) < maxsize ? il0->get_ids(list_no) : nullptr;
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
void StopWordsInvertedLists::release_codes (
|
|
649
|
+
size_t list_no, const uint8_t *codes) const
|
|
650
|
+
{
|
|
651
|
+
if (il0->list_size (list_no) < maxsize) {
|
|
652
|
+
il0->release_codes (list_no, codes);
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
void StopWordsInvertedLists::release_ids (size_t list_no, const idx_t *ids) const
|
|
657
|
+
{
|
|
658
|
+
if (il0->list_size (list_no) < maxsize) {
|
|
659
|
+
il0->release_ids (list_no, ids);
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
idx_t StopWordsInvertedLists::get_single_id (size_t list_no, size_t offset) const
|
|
664
|
+
{
|
|
665
|
+
FAISS_THROW_IF_NOT(il0->list_size (list_no) < maxsize);
|
|
666
|
+
return il0->get_single_id (list_no, offset);
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
const uint8_t * StopWordsInvertedLists::get_single_code (
|
|
670
|
+
size_t list_no, size_t offset) const
|
|
671
|
+
{
|
|
672
|
+
FAISS_THROW_IF_NOT(il0->list_size (list_no) < maxsize);
|
|
673
|
+
return il0->get_single_code (list_no, offset);
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
void StopWordsInvertedLists::prefetch_lists (
|
|
677
|
+
const idx_t *list_nos, int nlist) const
|
|
678
|
+
{
|
|
679
|
+
std::vector<idx_t> list0;
|
|
680
|
+
for (int i = 0; i < nlist; i++) {
|
|
681
|
+
idx_t list_no = list_nos[i];
|
|
682
|
+
if (list_no < 0) continue;
|
|
683
|
+
if (il0->list_size(list_no) < maxsize) {
|
|
684
|
+
list0.push_back(list_no);
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
il0->prefetch_lists (list0.data(), list0.size());
|
|
688
|
+
}
|
|
689
|
+
|
|
619
690
|
|
|
620
691
|
|
|
621
692
|
} // namespace faiss
|
|
@@ -36,6 +36,10 @@ struct InvertedLists {
|
|
|
36
36
|
|
|
37
37
|
InvertedLists (size_t nlist, size_t code_size);
|
|
38
38
|
|
|
39
|
+
/// used for BlockInvertedLists, where the codes are packed into groups
|
|
40
|
+
/// and the individual code size is meaningless
|
|
41
|
+
static const size_t INVALID_CODE_SIZE = static_cast<size_t>(-1);
|
|
42
|
+
|
|
39
43
|
/*************************
|
|
40
44
|
* Read only functions */
|
|
41
45
|
|
|
@@ -198,7 +202,7 @@ struct ArrayInvertedLists: InvertedLists {
|
|
|
198
202
|
|
|
199
203
|
void resize (size_t list_no, size_t new_size) override;
|
|
200
204
|
|
|
201
|
-
|
|
205
|
+
~ArrayInvertedLists () override;
|
|
202
206
|
};
|
|
203
207
|
|
|
204
208
|
/*****************************************************************
|
|
@@ -329,6 +333,33 @@ struct MaskedInvertedLists: ReadOnlyInvertedLists {
|
|
|
329
333
|
|
|
330
334
|
};
|
|
331
335
|
|
|
336
|
+
|
|
337
|
+
/** if the inverted list in il is smaller than maxsize then return it,
|
|
338
|
+
* otherwise return an empty invlist */
|
|
339
|
+
struct StopWordsInvertedLists: ReadOnlyInvertedLists {
|
|
340
|
+
|
|
341
|
+
const InvertedLists *il0;
|
|
342
|
+
size_t maxsize;
|
|
343
|
+
|
|
344
|
+
StopWordsInvertedLists (const InvertedLists *il, size_t maxsize);
|
|
345
|
+
|
|
346
|
+
size_t list_size(size_t list_no) const override;
|
|
347
|
+
const uint8_t * get_codes (size_t list_no) const override;
|
|
348
|
+
const idx_t * get_ids (size_t list_no) const override;
|
|
349
|
+
|
|
350
|
+
void release_codes (size_t list_no, const uint8_t *codes) const override;
|
|
351
|
+
void release_ids (size_t list_no, const idx_t *ids) const override;
|
|
352
|
+
|
|
353
|
+
idx_t get_single_id (size_t list_no, size_t offset) const override;
|
|
354
|
+
|
|
355
|
+
const uint8_t * get_single_code (
|
|
356
|
+
size_t list_no, size_t offset) const override;
|
|
357
|
+
|
|
358
|
+
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
|
|
359
|
+
|
|
360
|
+
};
|
|
361
|
+
|
|
362
|
+
|
|
332
363
|
} // namespace faiss
|
|
333
364
|
|
|
334
365
|
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include <faiss/invlists/InvertedListsIOHook.h>
|
|
9
|
+
|
|
10
|
+
#include <faiss/impl/io.h>
|
|
11
|
+
#include <faiss/impl/io_macros.h>
|
|
12
|
+
#include <faiss/impl/FaissAssert.h>
|
|
13
|
+
|
|
14
|
+
#include <faiss/invlists/BlockInvertedLists.h>
|
|
15
|
+
|
|
16
|
+
#ifndef _MSC_VER
|
|
17
|
+
#include <faiss/invlists/OnDiskInvertedLists.h>
|
|
18
|
+
#endif // !_MSC_VER
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
namespace faiss {
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
/**********************************************************
|
|
25
|
+
* InvertedListIOHook's
|
|
26
|
+
**********************************************************/
|
|
27
|
+
|
|
28
|
+
InvertedListsIOHook::InvertedListsIOHook(
|
|
29
|
+
const std::string & key, const std::string & classname):
|
|
30
|
+
key(key), classname(classname)
|
|
31
|
+
{}
|
|
32
|
+
|
|
33
|
+
namespace {
|
|
34
|
+
|
|
35
|
+
/// std::vector that deletes its contents
|
|
36
|
+
struct IOHookTable: std::vector<InvertedListsIOHook*> {
|
|
37
|
+
|
|
38
|
+
IOHookTable() {
|
|
39
|
+
#ifndef _MSC_VER
|
|
40
|
+
push_back(new OnDiskInvertedListsIOHook());
|
|
41
|
+
#endif
|
|
42
|
+
push_back(new BlockInvertedListsIOHook());
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
~IOHookTable() {
|
|
46
|
+
for (auto x: *this) {
|
|
47
|
+
delete x;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
static IOHookTable InvertedListsIOHook_table;
|
|
53
|
+
|
|
54
|
+
} // anonymous namepsace
|
|
55
|
+
|
|
56
|
+
InvertedListsIOHook* InvertedListsIOHook::lookup(int h)
|
|
57
|
+
{
|
|
58
|
+
for(const auto & callback: InvertedListsIOHook_table) {
|
|
59
|
+
if (h == fourcc(callback->key)) {
|
|
60
|
+
return callback;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
FAISS_THROW_FMT (
|
|
64
|
+
"read_InvertedLists: could not load ArrayInvertedLists as "
|
|
65
|
+
"%08x (\"%s\")", h, fourcc_inv_printable(h).c_str()
|
|
66
|
+
);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
InvertedListsIOHook* InvertedListsIOHook::lookup_classname(const std::string & classname)
|
|
70
|
+
{
|
|
71
|
+
for(const auto & callback: InvertedListsIOHook_table) {
|
|
72
|
+
if (callback->classname == classname) {
|
|
73
|
+
return callback;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
FAISS_THROW_FMT (
|
|
77
|
+
"read_InvertedLists: could not find classname %s",
|
|
78
|
+
classname.c_str()
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
void InvertedListsIOHook::add_callback(InvertedListsIOHook *cb)
|
|
83
|
+
{
|
|
84
|
+
InvertedListsIOHook_table.push_back(cb);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
void InvertedListsIOHook::print_callbacks()
|
|
88
|
+
{
|
|
89
|
+
printf("registered %zd InvertedListsIOHooks:\n",
|
|
90
|
+
InvertedListsIOHook_table.size());
|
|
91
|
+
for(const auto & cb: InvertedListsIOHook_table) {
|
|
92
|
+
printf("%08x %s %s\n",
|
|
93
|
+
fourcc(cb->key.c_str()),
|
|
94
|
+
cb->key.c_str(),
|
|
95
|
+
cb->classname.c_str());
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
InvertedLists * InvertedListsIOHook::read_ArrayInvertedLists(
|
|
100
|
+
IOReader *, int ,
|
|
101
|
+
size_t , size_t ,
|
|
102
|
+
const std::vector<size_t> &) const
|
|
103
|
+
{
|
|
104
|
+
FAISS_THROW_FMT("read to array not implemented for %s", classname.c_str());
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
} // namespace faiss
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <string>
|
|
11
|
+
#include <faiss/invlists/InvertedLists.h>
|
|
12
|
+
#include <faiss/impl/io.h>
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
namespace faiss {
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
/** Callbacks to handle other types of InvertedList objects.
|
|
19
|
+
*
|
|
20
|
+
* The callbacks should be registered with add_callback before calling
|
|
21
|
+
* read_index or read_InvertedLists. The callbacks for
|
|
22
|
+
* OnDiskInvertedLists are registrered by default. The invlist type is
|
|
23
|
+
* identified by:
|
|
24
|
+
*
|
|
25
|
+
* - the key (a fourcc) at read time
|
|
26
|
+
* - the class name (as given by typeid.name) at write time
|
|
27
|
+
*/
|
|
28
|
+
struct InvertedListsIOHook {
|
|
29
|
+
const std::string key; ///< string version of the fourcc
|
|
30
|
+
const std::string classname; ///< typeid.name
|
|
31
|
+
|
|
32
|
+
InvertedListsIOHook(const std::string & key, const std::string & classname);
|
|
33
|
+
|
|
34
|
+
/// write the index to the IOWriter (including the fourcc)
|
|
35
|
+
virtual void write(const InvertedLists *ils, IOWriter *f) const = 0;
|
|
36
|
+
|
|
37
|
+
/// called when the fourcc matches this class's fourcc
|
|
38
|
+
virtual InvertedLists * read(IOReader *f, int io_flags) const = 0;
|
|
39
|
+
|
|
40
|
+
/** read from a ArrayInvertedLists into this invertedlist type.
|
|
41
|
+
* For this to work, the callback has to be enabled and the io_flag has to be set to
|
|
42
|
+
* IO_FLAG_SKIP_IVF_DATA | (16 upper bits of the fourcc)
|
|
43
|
+
*
|
|
44
|
+
* (default implementation fails)
|
|
45
|
+
*/
|
|
46
|
+
virtual InvertedLists * read_ArrayInvertedLists(
|
|
47
|
+
IOReader *f, int io_flags,
|
|
48
|
+
size_t nlist, size_t code_size,
|
|
49
|
+
const std::vector<size_t> &sizes) const;
|
|
50
|
+
|
|
51
|
+
virtual ~InvertedListsIOHook() {}
|
|
52
|
+
|
|
53
|
+
/**************************** Manage the set of callbacks ******/
|
|
54
|
+
|
|
55
|
+
// transfers ownership
|
|
56
|
+
static void add_callback(InvertedListsIOHook *);
|
|
57
|
+
static void print_callbacks();
|
|
58
|
+
static InvertedListsIOHook* lookup(int h);
|
|
59
|
+
static InvertedListsIOHook* lookup_classname(const std::string & classname);
|
|
60
|
+
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
} // namespace faiss
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
// -*- c++ -*-
|
|
9
9
|
|
|
10
|
-
#include <faiss/OnDiskInvertedLists.h>
|
|
10
|
+
#include <faiss/invlists/OnDiskInvertedLists.h>
|
|
11
11
|
|
|
12
12
|
#include <pthread.h>
|
|
13
13
|
|
|
@@ -130,7 +130,8 @@ struct LockLevels {
|
|
|
130
130
|
|
|
131
131
|
void print () {
|
|
132
132
|
pthread_mutex_lock(&mutex1);
|
|
133
|
-
printf("State: level3_in_use=%d n_level2=%d level1_holders: [",
|
|
133
|
+
printf("State: level3_in_use=%d n_level2=%d level1_holders: [",
|
|
134
|
+
int(level3_in_use), n_level2);
|
|
134
135
|
for (int k : level1_holders) {
|
|
135
136
|
printf("%d ", k);
|
|
136
137
|
}
|
|
@@ -299,8 +300,7 @@ void OnDiskInvertedLists::update_totsize (size_t new_size)
|
|
|
299
300
|
// unmap file
|
|
300
301
|
if (ptr != nullptr) {
|
|
301
302
|
int err = munmap (ptr, totsize);
|
|
302
|
-
FAISS_THROW_IF_NOT_FMT (err == 0, "munmap error: %s",
|
|
303
|
-
strerror(errno));
|
|
303
|
+
FAISS_THROW_IF_NOT_FMT (err == 0, "munmap error: %s", strerror(errno));
|
|
304
304
|
}
|
|
305
305
|
if (totsize == 0) {
|
|
306
306
|
// must create file before truncating it
|
|
@@ -516,8 +516,9 @@ size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
|
|
|
516
516
|
if (it == slots.end()) {
|
|
517
517
|
// not enough capacity
|
|
518
518
|
size_t new_size = totsize == 0 ? 32 : totsize * 2;
|
|
519
|
-
while (new_size - totsize < capacity)
|
|
519
|
+
while (new_size - totsize < capacity) {
|
|
520
520
|
new_size *= 2;
|
|
521
|
+
}
|
|
521
522
|
locks->lock_3 ();
|
|
522
523
|
update_totsize(new_size);
|
|
523
524
|
locks->unlock_3 ();
|
|
@@ -678,6 +679,18 @@ void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1)
|
|
|
678
679
|
nlist = l1 - l0;
|
|
679
680
|
}
|
|
680
681
|
|
|
682
|
+
|
|
683
|
+
void OnDiskInvertedLists::set_all_lists_sizes(const size_t *sizes)
|
|
684
|
+
{
|
|
685
|
+
size_t ofs = 0;
|
|
686
|
+
for (size_t i = 0; i < nlist; i++) {
|
|
687
|
+
lists[i].offset = ofs;
|
|
688
|
+
lists[i].capacity = lists[i].size = sizes[i];
|
|
689
|
+
ofs += sizes[i] * (sizeof(idx_t) + code_size);
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
}
|
|
693
|
+
|
|
681
694
|
/*******************************************************
|
|
682
695
|
* I/O support via callbacks
|
|
683
696
|
*******************************************************/
|
|
@@ -755,7 +768,9 @@ InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
|
|
|
755
768
|
|
|
756
769
|
}
|
|
757
770
|
READ1(od->totsize);
|
|
758
|
-
|
|
771
|
+
if (!(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
|
772
|
+
od->do_mmap();
|
|
773
|
+
}
|
|
759
774
|
return od;
|
|
760
775
|
}
|
|
761
776
|
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
#include <typeinfo>
|
|
16
16
|
|
|
17
17
|
#include <faiss/IndexIVF.h>
|
|
18
|
-
|
|
18
|
+
#include <faiss/invlists/InvertedListsIOHook.h>
|
|
19
19
|
#include <faiss/index_io.h>
|
|
20
20
|
|
|
21
21
|
namespace faiss {
|
|
@@ -109,7 +109,7 @@ struct OnDiskInvertedLists: InvertedLists {
|
|
|
109
109
|
|
|
110
110
|
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
|
|
111
111
|
|
|
112
|
-
|
|
112
|
+
~OnDiskInvertedLists () override;
|
|
113
113
|
|
|
114
114
|
// private
|
|
115
115
|
|
|
@@ -126,6 +126,9 @@ struct OnDiskInvertedLists: InvertedLists {
|
|
|
126
126
|
size_t allocate_slot (size_t capacity);
|
|
127
127
|
void free_slot (size_t offset, size_t capacity);
|
|
128
128
|
|
|
129
|
+
/// override all list sizes and make a packed storage
|
|
130
|
+
void set_all_lists_sizes(const size_t *sizes);
|
|
131
|
+
|
|
129
132
|
// empty constructor for the I/O functions
|
|
130
133
|
OnDiskInvertedLists ();
|
|
131
134
|
};
|
|
@@ -1,8 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
1
8
|
#pragma once
|
|
2
9
|
|
|
3
10
|
#include "Python.h"
|
|
4
11
|
#include <faiss/impl/io.h>
|
|
5
|
-
#include <faiss/InvertedLists.h>
|
|
12
|
+
#include <faiss/invlists/InvertedLists.h>
|
|
6
13
|
|
|
7
14
|
// all callbacks have to acquire the GIL on input
|
|
8
15
|
|