faiss 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -7,7 +7,7 @@
|
|
7
7
|
|
8
8
|
// -*- c++ -*-
|
9
9
|
|
10
|
-
#include <faiss/InvertedLists.h>
|
10
|
+
#include <faiss/invlists/InvertedLists.h>
|
11
11
|
|
12
12
|
#include <cstdio>
|
13
13
|
|
@@ -616,6 +616,77 @@ void MaskedInvertedLists::prefetch_lists (
|
|
616
616
|
il1->prefetch_lists (list1.data(), list1.size());
|
617
617
|
}
|
618
618
|
|
619
|
+
/*****************************************
|
620
|
+
* MaskedInvertedLists implementation
|
621
|
+
******************************************/
|
622
|
+
|
623
|
+
|
624
|
+
StopWordsInvertedLists::StopWordsInvertedLists (
|
625
|
+
const InvertedLists *il0, size_t maxsize):
|
626
|
+
ReadOnlyInvertedLists (il0->nlist, il0->code_size),
|
627
|
+
il0 (il0), maxsize (maxsize)
|
628
|
+
{
|
629
|
+
|
630
|
+
}
|
631
|
+
|
632
|
+
size_t StopWordsInvertedLists::list_size(size_t list_no) const
|
633
|
+
{
|
634
|
+
size_t sz = il0->list_size(list_no);
|
635
|
+
return sz < maxsize ? sz : 0;
|
636
|
+
}
|
637
|
+
|
638
|
+
const uint8_t * StopWordsInvertedLists::get_codes (size_t list_no) const
|
639
|
+
{
|
640
|
+
return il0->list_size(list_no) < maxsize ? il0->get_codes(list_no) : nullptr;
|
641
|
+
}
|
642
|
+
|
643
|
+
const idx_t * StopWordsInvertedLists::get_ids (size_t list_no) const
|
644
|
+
{
|
645
|
+
return il0->list_size(list_no) < maxsize ? il0->get_ids(list_no) : nullptr;
|
646
|
+
}
|
647
|
+
|
648
|
+
void StopWordsInvertedLists::release_codes (
|
649
|
+
size_t list_no, const uint8_t *codes) const
|
650
|
+
{
|
651
|
+
if (il0->list_size (list_no) < maxsize) {
|
652
|
+
il0->release_codes (list_no, codes);
|
653
|
+
}
|
654
|
+
}
|
655
|
+
|
656
|
+
void StopWordsInvertedLists::release_ids (size_t list_no, const idx_t *ids) const
|
657
|
+
{
|
658
|
+
if (il0->list_size (list_no) < maxsize) {
|
659
|
+
il0->release_ids (list_no, ids);
|
660
|
+
}
|
661
|
+
}
|
662
|
+
|
663
|
+
idx_t StopWordsInvertedLists::get_single_id (size_t list_no, size_t offset) const
|
664
|
+
{
|
665
|
+
FAISS_THROW_IF_NOT(il0->list_size (list_no) < maxsize);
|
666
|
+
return il0->get_single_id (list_no, offset);
|
667
|
+
}
|
668
|
+
|
669
|
+
const uint8_t * StopWordsInvertedLists::get_single_code (
|
670
|
+
size_t list_no, size_t offset) const
|
671
|
+
{
|
672
|
+
FAISS_THROW_IF_NOT(il0->list_size (list_no) < maxsize);
|
673
|
+
return il0->get_single_code (list_no, offset);
|
674
|
+
}
|
675
|
+
|
676
|
+
void StopWordsInvertedLists::prefetch_lists (
|
677
|
+
const idx_t *list_nos, int nlist) const
|
678
|
+
{
|
679
|
+
std::vector<idx_t> list0;
|
680
|
+
for (int i = 0; i < nlist; i++) {
|
681
|
+
idx_t list_no = list_nos[i];
|
682
|
+
if (list_no < 0) continue;
|
683
|
+
if (il0->list_size(list_no) < maxsize) {
|
684
|
+
list0.push_back(list_no);
|
685
|
+
}
|
686
|
+
}
|
687
|
+
il0->prefetch_lists (list0.data(), list0.size());
|
688
|
+
}
|
689
|
+
|
619
690
|
|
620
691
|
|
621
692
|
} // namespace faiss
|
@@ -36,6 +36,10 @@ struct InvertedLists {
|
|
36
36
|
|
37
37
|
InvertedLists (size_t nlist, size_t code_size);
|
38
38
|
|
39
|
+
/// used for BlockInvertedLists, where the codes are packed into groups
|
40
|
+
/// and the individual code size is meaningless
|
41
|
+
static const size_t INVALID_CODE_SIZE = static_cast<size_t>(-1);
|
42
|
+
|
39
43
|
/*************************
|
40
44
|
* Read only functions */
|
41
45
|
|
@@ -198,7 +202,7 @@ struct ArrayInvertedLists: InvertedLists {
|
|
198
202
|
|
199
203
|
void resize (size_t list_no, size_t new_size) override;
|
200
204
|
|
201
|
-
|
205
|
+
~ArrayInvertedLists () override;
|
202
206
|
};
|
203
207
|
|
204
208
|
/*****************************************************************
|
@@ -329,6 +333,33 @@ struct MaskedInvertedLists: ReadOnlyInvertedLists {
|
|
329
333
|
|
330
334
|
};
|
331
335
|
|
336
|
+
|
337
|
+
/** if the inverted list in il is smaller than maxsize then return it,
|
338
|
+
* otherwise return an empty invlist */
|
339
|
+
struct StopWordsInvertedLists: ReadOnlyInvertedLists {
|
340
|
+
|
341
|
+
const InvertedLists *il0;
|
342
|
+
size_t maxsize;
|
343
|
+
|
344
|
+
StopWordsInvertedLists (const InvertedLists *il, size_t maxsize);
|
345
|
+
|
346
|
+
size_t list_size(size_t list_no) const override;
|
347
|
+
const uint8_t * get_codes (size_t list_no) const override;
|
348
|
+
const idx_t * get_ids (size_t list_no) const override;
|
349
|
+
|
350
|
+
void release_codes (size_t list_no, const uint8_t *codes) const override;
|
351
|
+
void release_ids (size_t list_no, const idx_t *ids) const override;
|
352
|
+
|
353
|
+
idx_t get_single_id (size_t list_no, size_t offset) const override;
|
354
|
+
|
355
|
+
const uint8_t * get_single_code (
|
356
|
+
size_t list_no, size_t offset) const override;
|
357
|
+
|
358
|
+
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
|
359
|
+
|
360
|
+
};
|
361
|
+
|
362
|
+
|
332
363
|
} // namespace faiss
|
333
364
|
|
334
365
|
|
@@ -0,0 +1,107 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#include <faiss/invlists/InvertedListsIOHook.h>
|
9
|
+
|
10
|
+
#include <faiss/impl/io.h>
|
11
|
+
#include <faiss/impl/io_macros.h>
|
12
|
+
#include <faiss/impl/FaissAssert.h>
|
13
|
+
|
14
|
+
#include <faiss/invlists/BlockInvertedLists.h>
|
15
|
+
|
16
|
+
#ifndef _MSC_VER
|
17
|
+
#include <faiss/invlists/OnDiskInvertedLists.h>
|
18
|
+
#endif // !_MSC_VER
|
19
|
+
|
20
|
+
|
21
|
+
namespace faiss {
|
22
|
+
|
23
|
+
|
24
|
+
/**********************************************************
|
25
|
+
* InvertedListIOHook's
|
26
|
+
**********************************************************/
|
27
|
+
|
28
|
+
InvertedListsIOHook::InvertedListsIOHook(
|
29
|
+
const std::string & key, const std::string & classname):
|
30
|
+
key(key), classname(classname)
|
31
|
+
{}
|
32
|
+
|
33
|
+
namespace {
|
34
|
+
|
35
|
+
/// std::vector that deletes its contents
|
36
|
+
struct IOHookTable: std::vector<InvertedListsIOHook*> {
|
37
|
+
|
38
|
+
IOHookTable() {
|
39
|
+
#ifndef _MSC_VER
|
40
|
+
push_back(new OnDiskInvertedListsIOHook());
|
41
|
+
#endif
|
42
|
+
push_back(new BlockInvertedListsIOHook());
|
43
|
+
}
|
44
|
+
|
45
|
+
~IOHookTable() {
|
46
|
+
for (auto x: *this) {
|
47
|
+
delete x;
|
48
|
+
}
|
49
|
+
}
|
50
|
+
};
|
51
|
+
|
52
|
+
static IOHookTable InvertedListsIOHook_table;
|
53
|
+
|
54
|
+
} // anonymous namepsace
|
55
|
+
|
56
|
+
InvertedListsIOHook* InvertedListsIOHook::lookup(int h)
|
57
|
+
{
|
58
|
+
for(const auto & callback: InvertedListsIOHook_table) {
|
59
|
+
if (h == fourcc(callback->key)) {
|
60
|
+
return callback;
|
61
|
+
}
|
62
|
+
}
|
63
|
+
FAISS_THROW_FMT (
|
64
|
+
"read_InvertedLists: could not load ArrayInvertedLists as "
|
65
|
+
"%08x (\"%s\")", h, fourcc_inv_printable(h).c_str()
|
66
|
+
);
|
67
|
+
}
|
68
|
+
|
69
|
+
InvertedListsIOHook* InvertedListsIOHook::lookup_classname(const std::string & classname)
|
70
|
+
{
|
71
|
+
for(const auto & callback: InvertedListsIOHook_table) {
|
72
|
+
if (callback->classname == classname) {
|
73
|
+
return callback;
|
74
|
+
}
|
75
|
+
}
|
76
|
+
FAISS_THROW_FMT (
|
77
|
+
"read_InvertedLists: could not find classname %s",
|
78
|
+
classname.c_str()
|
79
|
+
);
|
80
|
+
}
|
81
|
+
|
82
|
+
void InvertedListsIOHook::add_callback(InvertedListsIOHook *cb)
|
83
|
+
{
|
84
|
+
InvertedListsIOHook_table.push_back(cb);
|
85
|
+
}
|
86
|
+
|
87
|
+
void InvertedListsIOHook::print_callbacks()
|
88
|
+
{
|
89
|
+
printf("registered %zd InvertedListsIOHooks:\n",
|
90
|
+
InvertedListsIOHook_table.size());
|
91
|
+
for(const auto & cb: InvertedListsIOHook_table) {
|
92
|
+
printf("%08x %s %s\n",
|
93
|
+
fourcc(cb->key.c_str()),
|
94
|
+
cb->key.c_str(),
|
95
|
+
cb->classname.c_str());
|
96
|
+
}
|
97
|
+
}
|
98
|
+
|
99
|
+
InvertedLists * InvertedListsIOHook::read_ArrayInvertedLists(
|
100
|
+
IOReader *, int ,
|
101
|
+
size_t , size_t ,
|
102
|
+
const std::vector<size_t> &) const
|
103
|
+
{
|
104
|
+
FAISS_THROW_FMT("read to array not implemented for %s", classname.c_str());
|
105
|
+
}
|
106
|
+
|
107
|
+
} // namespace faiss
|
@@ -0,0 +1,63 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
#include <string>
|
11
|
+
#include <faiss/invlists/InvertedLists.h>
|
12
|
+
#include <faiss/impl/io.h>
|
13
|
+
|
14
|
+
|
15
|
+
namespace faiss {
|
16
|
+
|
17
|
+
|
18
|
+
/** Callbacks to handle other types of InvertedList objects.
|
19
|
+
*
|
20
|
+
* The callbacks should be registered with add_callback before calling
|
21
|
+
* read_index or read_InvertedLists. The callbacks for
|
22
|
+
* OnDiskInvertedLists are registrered by default. The invlist type is
|
23
|
+
* identified by:
|
24
|
+
*
|
25
|
+
* - the key (a fourcc) at read time
|
26
|
+
* - the class name (as given by typeid.name) at write time
|
27
|
+
*/
|
28
|
+
struct InvertedListsIOHook {
|
29
|
+
const std::string key; ///< string version of the fourcc
|
30
|
+
const std::string classname; ///< typeid.name
|
31
|
+
|
32
|
+
InvertedListsIOHook(const std::string & key, const std::string & classname);
|
33
|
+
|
34
|
+
/// write the index to the IOWriter (including the fourcc)
|
35
|
+
virtual void write(const InvertedLists *ils, IOWriter *f) const = 0;
|
36
|
+
|
37
|
+
/// called when the fourcc matches this class's fourcc
|
38
|
+
virtual InvertedLists * read(IOReader *f, int io_flags) const = 0;
|
39
|
+
|
40
|
+
/** read from a ArrayInvertedLists into this invertedlist type.
|
41
|
+
* For this to work, the callback has to be enabled and the io_flag has to be set to
|
42
|
+
* IO_FLAG_SKIP_IVF_DATA | (16 upper bits of the fourcc)
|
43
|
+
*
|
44
|
+
* (default implementation fails)
|
45
|
+
*/
|
46
|
+
virtual InvertedLists * read_ArrayInvertedLists(
|
47
|
+
IOReader *f, int io_flags,
|
48
|
+
size_t nlist, size_t code_size,
|
49
|
+
const std::vector<size_t> &sizes) const;
|
50
|
+
|
51
|
+
virtual ~InvertedListsIOHook() {}
|
52
|
+
|
53
|
+
/**************************** Manage the set of callbacks ******/
|
54
|
+
|
55
|
+
// transfers ownership
|
56
|
+
static void add_callback(InvertedListsIOHook *);
|
57
|
+
static void print_callbacks();
|
58
|
+
static InvertedListsIOHook* lookup(int h);
|
59
|
+
static InvertedListsIOHook* lookup_classname(const std::string & classname);
|
60
|
+
|
61
|
+
};
|
62
|
+
|
63
|
+
} // namespace faiss
|
@@ -7,7 +7,7 @@
|
|
7
7
|
|
8
8
|
// -*- c++ -*-
|
9
9
|
|
10
|
-
#include <faiss/OnDiskInvertedLists.h>
|
10
|
+
#include <faiss/invlists/OnDiskInvertedLists.h>
|
11
11
|
|
12
12
|
#include <pthread.h>
|
13
13
|
|
@@ -130,7 +130,8 @@ struct LockLevels {
|
|
130
130
|
|
131
131
|
void print () {
|
132
132
|
pthread_mutex_lock(&mutex1);
|
133
|
-
printf("State: level3_in_use=%d n_level2=%d level1_holders: [",
|
133
|
+
printf("State: level3_in_use=%d n_level2=%d level1_holders: [",
|
134
|
+
int(level3_in_use), n_level2);
|
134
135
|
for (int k : level1_holders) {
|
135
136
|
printf("%d ", k);
|
136
137
|
}
|
@@ -299,8 +300,7 @@ void OnDiskInvertedLists::update_totsize (size_t new_size)
|
|
299
300
|
// unmap file
|
300
301
|
if (ptr != nullptr) {
|
301
302
|
int err = munmap (ptr, totsize);
|
302
|
-
FAISS_THROW_IF_NOT_FMT (err == 0, "munmap error: %s",
|
303
|
-
strerror(errno));
|
303
|
+
FAISS_THROW_IF_NOT_FMT (err == 0, "munmap error: %s", strerror(errno));
|
304
304
|
}
|
305
305
|
if (totsize == 0) {
|
306
306
|
// must create file before truncating it
|
@@ -516,8 +516,9 @@ size_t OnDiskInvertedLists::allocate_slot (size_t capacity) {
|
|
516
516
|
if (it == slots.end()) {
|
517
517
|
// not enough capacity
|
518
518
|
size_t new_size = totsize == 0 ? 32 : totsize * 2;
|
519
|
-
while (new_size - totsize < capacity)
|
519
|
+
while (new_size - totsize < capacity) {
|
520
520
|
new_size *= 2;
|
521
|
+
}
|
521
522
|
locks->lock_3 ();
|
522
523
|
update_totsize(new_size);
|
523
524
|
locks->unlock_3 ();
|
@@ -678,6 +679,18 @@ void OnDiskInvertedLists::crop_invlists(size_t l0, size_t l1)
|
|
678
679
|
nlist = l1 - l0;
|
679
680
|
}
|
680
681
|
|
682
|
+
|
683
|
+
void OnDiskInvertedLists::set_all_lists_sizes(const size_t *sizes)
|
684
|
+
{
|
685
|
+
size_t ofs = 0;
|
686
|
+
for (size_t i = 0; i < nlist; i++) {
|
687
|
+
lists[i].offset = ofs;
|
688
|
+
lists[i].capacity = lists[i].size = sizes[i];
|
689
|
+
ofs += sizes[i] * (sizeof(idx_t) + code_size);
|
690
|
+
}
|
691
|
+
|
692
|
+
}
|
693
|
+
|
681
694
|
/*******************************************************
|
682
695
|
* I/O support via callbacks
|
683
696
|
*******************************************************/
|
@@ -755,7 +768,9 @@ InvertedLists * OnDiskInvertedListsIOHook::read(IOReader *f, int io_flags) const
|
|
755
768
|
|
756
769
|
}
|
757
770
|
READ1(od->totsize);
|
758
|
-
|
771
|
+
if (!(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
772
|
+
od->do_mmap();
|
773
|
+
}
|
759
774
|
return od;
|
760
775
|
}
|
761
776
|
|
@@ -15,7 +15,7 @@
|
|
15
15
|
#include <typeinfo>
|
16
16
|
|
17
17
|
#include <faiss/IndexIVF.h>
|
18
|
-
|
18
|
+
#include <faiss/invlists/InvertedListsIOHook.h>
|
19
19
|
#include <faiss/index_io.h>
|
20
20
|
|
21
21
|
namespace faiss {
|
@@ -109,7 +109,7 @@ struct OnDiskInvertedLists: InvertedLists {
|
|
109
109
|
|
110
110
|
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
|
111
111
|
|
112
|
-
|
112
|
+
~OnDiskInvertedLists () override;
|
113
113
|
|
114
114
|
// private
|
115
115
|
|
@@ -126,6 +126,9 @@ struct OnDiskInvertedLists: InvertedLists {
|
|
126
126
|
size_t allocate_slot (size_t capacity);
|
127
127
|
void free_slot (size_t offset, size_t capacity);
|
128
128
|
|
129
|
+
/// override all list sizes and make a packed storage
|
130
|
+
void set_all_lists_sizes(const size_t *sizes);
|
131
|
+
|
129
132
|
// empty constructor for the I/O functions
|
130
133
|
OnDiskInvertedLists ();
|
131
134
|
};
|
@@ -1,8 +1,15 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
1
8
|
#pragma once
|
2
9
|
|
3
10
|
#include "Python.h"
|
4
11
|
#include <faiss/impl/io.h>
|
5
|
-
#include <faiss/InvertedLists.h>
|
12
|
+
#include <faiss/invlists/InvertedLists.h>
|
6
13
|
|
7
14
|
// all callbacks have to acquire the GIL on input
|
8
15
|
|