faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -11,90 +11,80 @@
|
|
|
11
11
|
|
|
12
12
|
#include <cstdio>
|
|
13
13
|
|
|
14
|
-
#include <faiss/utils/utils.h>
|
|
15
14
|
#include <faiss/impl/FaissAssert.h>
|
|
15
|
+
#include <faiss/utils/utils.h>
|
|
16
16
|
|
|
17
17
|
namespace faiss {
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
|
|
21
19
|
/*****************************************
|
|
22
20
|
* InvertedLists implementation
|
|
23
21
|
******************************************/
|
|
24
22
|
|
|
25
|
-
InvertedLists::InvertedLists
|
|
26
|
-
|
|
27
|
-
{
|
|
28
|
-
}
|
|
23
|
+
InvertedLists::InvertedLists(size_t nlist, size_t code_size)
|
|
24
|
+
: nlist(nlist), code_size(code_size) {}
|
|
29
25
|
|
|
30
|
-
InvertedLists::~InvertedLists
|
|
31
|
-
{}
|
|
26
|
+
InvertedLists::~InvertedLists() {}
|
|
32
27
|
|
|
33
|
-
InvertedLists::idx_t InvertedLists::get_single_id
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
assert (offset < list_size (list_no));
|
|
28
|
+
InvertedLists::idx_t InvertedLists::get_single_id(size_t list_no, size_t offset)
|
|
29
|
+
const {
|
|
30
|
+
assert(offset < list_size(list_no));
|
|
37
31
|
return get_ids(list_no)[offset];
|
|
38
32
|
}
|
|
39
33
|
|
|
34
|
+
void InvertedLists::release_codes(size_t, const uint8_t*) const {}
|
|
40
35
|
|
|
41
|
-
void InvertedLists::
|
|
42
|
-
{}
|
|
43
|
-
|
|
44
|
-
void InvertedLists::release_ids (size_t, const idx_t *) const
|
|
45
|
-
{}
|
|
36
|
+
void InvertedLists::release_ids(size_t, const idx_t*) const {}
|
|
46
37
|
|
|
47
|
-
void InvertedLists::prefetch_lists
|
|
48
|
-
{}
|
|
38
|
+
void InvertedLists::prefetch_lists(const idx_t*, int) const {}
|
|
49
39
|
|
|
50
|
-
const uint8_t
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
assert (offset < list_size (list_no));
|
|
40
|
+
const uint8_t* InvertedLists::get_single_code(size_t list_no, size_t offset)
|
|
41
|
+
const {
|
|
42
|
+
assert(offset < list_size(list_no));
|
|
54
43
|
return get_codes(list_no) + offset * code_size;
|
|
55
44
|
}
|
|
56
45
|
|
|
57
|
-
size_t InvertedLists::add_entry
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
46
|
+
size_t InvertedLists::add_entry(
|
|
47
|
+
size_t list_no,
|
|
48
|
+
idx_t theid,
|
|
49
|
+
const uint8_t* code) {
|
|
50
|
+
return add_entries(list_no, 1, &theid, code);
|
|
61
51
|
}
|
|
62
52
|
|
|
63
|
-
void InvertedLists::update_entry
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
53
|
+
void InvertedLists::update_entry(
|
|
54
|
+
size_t list_no,
|
|
55
|
+
size_t offset,
|
|
56
|
+
idx_t id,
|
|
57
|
+
const uint8_t* code) {
|
|
58
|
+
update_entries(list_no, offset, 1, &id, code);
|
|
67
59
|
}
|
|
68
60
|
|
|
69
|
-
void InvertedLists::reset
|
|
61
|
+
void InvertedLists::reset() {
|
|
70
62
|
for (size_t i = 0; i < nlist; i++) {
|
|
71
|
-
resize
|
|
63
|
+
resize(i, 0);
|
|
72
64
|
}
|
|
73
65
|
}
|
|
74
66
|
|
|
75
|
-
void InvertedLists::merge_from
|
|
76
|
-
|
|
67
|
+
void InvertedLists::merge_from(InvertedLists* oivf, size_t add_id) {
|
|
77
68
|
#pragma omp parallel for
|
|
78
69
|
for (idx_t i = 0; i < nlist; i++) {
|
|
79
|
-
size_t list_size = oivf->list_size
|
|
80
|
-
ScopedIds ids
|
|
70
|
+
size_t list_size = oivf->list_size(i);
|
|
71
|
+
ScopedIds ids(oivf, i);
|
|
81
72
|
if (add_id == 0) {
|
|
82
|
-
add_entries
|
|
83
|
-
ScopedCodes (oivf, i).get());
|
|
73
|
+
add_entries(i, list_size, ids.get(), ScopedCodes(oivf, i).get());
|
|
84
74
|
} else {
|
|
85
|
-
std::vector
|
|
75
|
+
std::vector<idx_t> new_ids(list_size);
|
|
86
76
|
|
|
87
77
|
for (size_t j = 0; j < list_size; j++) {
|
|
88
|
-
new_ids
|
|
78
|
+
new_ids[j] = ids[j] + add_id;
|
|
89
79
|
}
|
|
90
|
-
add_entries
|
|
91
|
-
|
|
80
|
+
add_entries(
|
|
81
|
+
i, list_size, new_ids.data(), ScopedCodes(oivf, i).get());
|
|
92
82
|
}
|
|
93
|
-
oivf->resize
|
|
83
|
+
oivf->resize(i, 0);
|
|
94
84
|
}
|
|
95
85
|
}
|
|
96
86
|
|
|
97
|
-
double InvertedLists::imbalance_factor
|
|
87
|
+
double InvertedLists::imbalance_factor() const {
|
|
98
88
|
std::vector<int> hist(nlist);
|
|
99
89
|
|
|
100
90
|
for (size_t i = 0; i < nlist; i++) {
|
|
@@ -104,7 +94,7 @@ double InvertedLists::imbalance_factor () const {
|
|
|
104
94
|
return faiss::imbalance_factor(nlist, hist.data());
|
|
105
95
|
}
|
|
106
96
|
|
|
107
|
-
void InvertedLists::print_stats
|
|
97
|
+
void InvertedLists::print_stats() const {
|
|
108
98
|
std::vector<int> sizes(40);
|
|
109
99
|
for (size_t i = 0; i < nlist; i++) {
|
|
110
100
|
for (size_t j = 0; j < sizes.size(); j++) {
|
|
@@ -121,7 +111,7 @@ void InvertedLists::print_stats () const {
|
|
|
121
111
|
}
|
|
122
112
|
}
|
|
123
113
|
|
|
124
|
-
size_t InvertedLists::compute_ntotal
|
|
114
|
+
size_t InvertedLists::compute_ntotal() const {
|
|
125
115
|
size_t tot = 0;
|
|
126
116
|
for (size_t i = 0; i < nlist; i++) {
|
|
127
117
|
tot += list_size(i);
|
|
@@ -133,195 +123,183 @@ size_t InvertedLists::compute_ntotal () const {
|
|
|
133
123
|
* ArrayInvertedLists implementation
|
|
134
124
|
******************************************/
|
|
135
125
|
|
|
136
|
-
ArrayInvertedLists::ArrayInvertedLists
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
size_t
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
{
|
|
147
|
-
if (n_entry == 0)
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
ids
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
126
|
+
ArrayInvertedLists::ArrayInvertedLists(size_t nlist, size_t code_size)
|
|
127
|
+
: InvertedLists(nlist, code_size) {
|
|
128
|
+
ids.resize(nlist);
|
|
129
|
+
codes.resize(nlist);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
size_t ArrayInvertedLists::add_entries(
|
|
133
|
+
size_t list_no,
|
|
134
|
+
size_t n_entry,
|
|
135
|
+
const idx_t* ids_in,
|
|
136
|
+
const uint8_t* code) {
|
|
137
|
+
if (n_entry == 0)
|
|
138
|
+
return 0;
|
|
139
|
+
assert(list_no < nlist);
|
|
140
|
+
size_t o = ids[list_no].size();
|
|
141
|
+
ids[list_no].resize(o + n_entry);
|
|
142
|
+
memcpy(&ids[list_no][o], ids_in, sizeof(ids_in[0]) * n_entry);
|
|
143
|
+
codes[list_no].resize((o + n_entry) * code_size);
|
|
144
|
+
memcpy(&codes[list_no][o * code_size], code, code_size * n_entry);
|
|
154
145
|
return o;
|
|
155
146
|
}
|
|
156
147
|
|
|
157
|
-
size_t ArrayInvertedLists::list_size(size_t list_no) const
|
|
158
|
-
|
|
159
|
-
assert (list_no < nlist);
|
|
148
|
+
size_t ArrayInvertedLists::list_size(size_t list_no) const {
|
|
149
|
+
assert(list_no < nlist);
|
|
160
150
|
return ids[list_no].size();
|
|
161
151
|
}
|
|
162
152
|
|
|
163
|
-
const uint8_t
|
|
164
|
-
|
|
165
|
-
assert (list_no < nlist);
|
|
153
|
+
const uint8_t* ArrayInvertedLists::get_codes(size_t list_no) const {
|
|
154
|
+
assert(list_no < nlist);
|
|
166
155
|
return codes[list_no].data();
|
|
167
156
|
}
|
|
168
157
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
{
|
|
172
|
-
assert (list_no < nlist);
|
|
158
|
+
const InvertedLists::idx_t* ArrayInvertedLists::get_ids(size_t list_no) const {
|
|
159
|
+
assert(list_no < nlist);
|
|
173
160
|
return ids[list_no].data();
|
|
174
161
|
}
|
|
175
162
|
|
|
176
|
-
void ArrayInvertedLists::resize
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
codes[list_no].resize (new_size * code_size);
|
|
163
|
+
void ArrayInvertedLists::resize(size_t list_no, size_t new_size) {
|
|
164
|
+
ids[list_no].resize(new_size);
|
|
165
|
+
codes[list_no].resize(new_size * code_size);
|
|
180
166
|
}
|
|
181
167
|
|
|
182
|
-
void ArrayInvertedLists::update_entries
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
168
|
+
void ArrayInvertedLists::update_entries(
|
|
169
|
+
size_t list_no,
|
|
170
|
+
size_t offset,
|
|
171
|
+
size_t n_entry,
|
|
172
|
+
const idx_t* ids_in,
|
|
173
|
+
const uint8_t* codes_in) {
|
|
174
|
+
assert(list_no < nlist);
|
|
175
|
+
assert(n_entry + offset <= ids[list_no].size());
|
|
176
|
+
memcpy(&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
|
|
177
|
+
memcpy(&codes[list_no][offset * code_size], codes_in, code_size * n_entry);
|
|
190
178
|
}
|
|
191
179
|
|
|
192
|
-
|
|
193
|
-
ArrayInvertedLists::~ArrayInvertedLists ()
|
|
194
|
-
{}
|
|
180
|
+
ArrayInvertedLists::~ArrayInvertedLists() {}
|
|
195
181
|
|
|
196
182
|
/*****************************************************************
|
|
197
183
|
* Meta-inverted list implementations
|
|
198
184
|
*****************************************************************/
|
|
199
185
|
|
|
200
|
-
|
|
201
|
-
size_t
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
{
|
|
205
|
-
FAISS_THROW_MSG
|
|
186
|
+
size_t ReadOnlyInvertedLists::add_entries(
|
|
187
|
+
size_t,
|
|
188
|
+
size_t,
|
|
189
|
+
const idx_t*,
|
|
190
|
+
const uint8_t*) {
|
|
191
|
+
FAISS_THROW_MSG("not implemented");
|
|
206
192
|
}
|
|
207
193
|
|
|
208
|
-
void ReadOnlyInvertedLists::update_entries
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
194
|
+
void ReadOnlyInvertedLists::update_entries(
|
|
195
|
+
size_t,
|
|
196
|
+
size_t,
|
|
197
|
+
size_t,
|
|
198
|
+
const idx_t*,
|
|
199
|
+
const uint8_t*) {
|
|
200
|
+
FAISS_THROW_MSG("not implemented");
|
|
212
201
|
}
|
|
213
202
|
|
|
214
|
-
void ReadOnlyInvertedLists::resize
|
|
215
|
-
|
|
216
|
-
FAISS_THROW_MSG ("not implemented");
|
|
203
|
+
void ReadOnlyInvertedLists::resize(size_t, size_t) {
|
|
204
|
+
FAISS_THROW_MSG("not implemented");
|
|
217
205
|
}
|
|
218
206
|
|
|
219
|
-
|
|
220
|
-
|
|
221
207
|
/*****************************************
|
|
222
208
|
* HStackInvertedLists implementation
|
|
223
209
|
******************************************/
|
|
224
210
|
|
|
225
|
-
HStackInvertedLists::HStackInvertedLists
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
FAISS_THROW_IF_NOT (nil > 0);
|
|
211
|
+
HStackInvertedLists::HStackInvertedLists(int nil, const InvertedLists** ils_in)
|
|
212
|
+
: ReadOnlyInvertedLists(
|
|
213
|
+
nil > 0 ? ils_in[0]->nlist : 0,
|
|
214
|
+
nil > 0 ? ils_in[0]->code_size : 0) {
|
|
215
|
+
FAISS_THROW_IF_NOT(nil > 0);
|
|
231
216
|
for (int i = 0; i < nil; i++) {
|
|
232
|
-
ils.push_back
|
|
233
|
-
FAISS_THROW_IF_NOT
|
|
234
|
-
|
|
217
|
+
ils.push_back(ils_in[i]);
|
|
218
|
+
FAISS_THROW_IF_NOT(
|
|
219
|
+
ils_in[i]->code_size == code_size && ils_in[i]->nlist == nlist);
|
|
235
220
|
}
|
|
236
221
|
}
|
|
237
222
|
|
|
238
|
-
size_t HStackInvertedLists::list_size(size_t list_no) const
|
|
239
|
-
{
|
|
223
|
+
size_t HStackInvertedLists::list_size(size_t list_no) const {
|
|
240
224
|
size_t sz = 0;
|
|
241
225
|
for (int i = 0; i < ils.size(); i++) {
|
|
242
|
-
const InvertedLists
|
|
243
|
-
sz += il->list_size
|
|
226
|
+
const InvertedLists* il = ils[i];
|
|
227
|
+
sz += il->list_size(list_no);
|
|
244
228
|
}
|
|
245
229
|
return sz;
|
|
246
230
|
}
|
|
247
231
|
|
|
248
|
-
const uint8_t
|
|
249
|
-
|
|
250
|
-
uint8_t *codes = new uint8_t [code_size * list_size(list_no)], *c = codes;
|
|
232
|
+
const uint8_t* HStackInvertedLists::get_codes(size_t list_no) const {
|
|
233
|
+
uint8_t *codes = new uint8_t[code_size * list_size(list_no)], *c = codes;
|
|
251
234
|
|
|
252
235
|
for (int i = 0; i < ils.size(); i++) {
|
|
253
|
-
const InvertedLists
|
|
236
|
+
const InvertedLists* il = ils[i];
|
|
254
237
|
size_t sz = il->list_size(list_no) * code_size;
|
|
255
238
|
if (sz > 0) {
|
|
256
|
-
memcpy
|
|
239
|
+
memcpy(c, ScopedCodes(il, list_no).get(), sz);
|
|
257
240
|
c += sz;
|
|
258
241
|
}
|
|
259
242
|
}
|
|
260
243
|
return codes;
|
|
261
244
|
}
|
|
262
245
|
|
|
263
|
-
const uint8_t
|
|
264
|
-
|
|
265
|
-
{
|
|
246
|
+
const uint8_t* HStackInvertedLists::get_single_code(
|
|
247
|
+
size_t list_no,
|
|
248
|
+
size_t offset) const {
|
|
266
249
|
for (int i = 0; i < ils.size(); i++) {
|
|
267
|
-
const InvertedLists
|
|
268
|
-
size_t sz = il->list_size
|
|
250
|
+
const InvertedLists* il = ils[i];
|
|
251
|
+
size_t sz = il->list_size(list_no);
|
|
269
252
|
if (offset < sz) {
|
|
270
253
|
// here we have to copy the code, otherwise it will crash at dealloc
|
|
271
|
-
uint8_t
|
|
272
|
-
memcpy
|
|
254
|
+
uint8_t* code = new uint8_t[code_size];
|
|
255
|
+
memcpy(code, ScopedCodes(il, list_no, offset).get(), code_size);
|
|
273
256
|
return code;
|
|
274
257
|
}
|
|
275
258
|
offset -= sz;
|
|
276
259
|
}
|
|
277
|
-
FAISS_THROW_FMT
|
|
260
|
+
FAISS_THROW_FMT("offset %zd unknown", offset);
|
|
278
261
|
}
|
|
279
262
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
delete [] codes;
|
|
263
|
+
void HStackInvertedLists::release_codes(size_t, const uint8_t* codes) const {
|
|
264
|
+
delete[] codes;
|
|
283
265
|
}
|
|
284
266
|
|
|
285
|
-
const Index::idx_t
|
|
286
|
-
|
|
287
|
-
idx_t *ids = new idx_t [list_size(list_no)], *c = ids;
|
|
267
|
+
const Index::idx_t* HStackInvertedLists::get_ids(size_t list_no) const {
|
|
268
|
+
idx_t *ids = new idx_t[list_size(list_no)], *c = ids;
|
|
288
269
|
|
|
289
270
|
for (int i = 0; i < ils.size(); i++) {
|
|
290
|
-
const InvertedLists
|
|
271
|
+
const InvertedLists* il = ils[i];
|
|
291
272
|
size_t sz = il->list_size(list_no);
|
|
292
273
|
if (sz > 0) {
|
|
293
|
-
memcpy
|
|
274
|
+
memcpy(c, ScopedIds(il, list_no).get(), sz * sizeof(idx_t));
|
|
294
275
|
c += sz;
|
|
295
276
|
}
|
|
296
277
|
}
|
|
297
278
|
return ids;
|
|
298
279
|
}
|
|
299
280
|
|
|
300
|
-
Index::idx_t HStackInvertedLists::get_single_id
|
|
301
|
-
|
|
302
|
-
{
|
|
303
|
-
|
|
281
|
+
Index::idx_t HStackInvertedLists::get_single_id(size_t list_no, size_t offset)
|
|
282
|
+
const {
|
|
304
283
|
for (int i = 0; i < ils.size(); i++) {
|
|
305
|
-
const InvertedLists
|
|
306
|
-
size_t sz = il->list_size
|
|
284
|
+
const InvertedLists* il = ils[i];
|
|
285
|
+
size_t sz = il->list_size(list_no);
|
|
307
286
|
if (offset < sz) {
|
|
308
|
-
return il->get_single_id
|
|
287
|
+
return il->get_single_id(list_no, offset);
|
|
309
288
|
}
|
|
310
289
|
offset -= sz;
|
|
311
290
|
}
|
|
312
|
-
FAISS_THROW_FMT
|
|
291
|
+
FAISS_THROW_FMT("offset %zd unknown", offset);
|
|
313
292
|
}
|
|
314
293
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
delete [] ids;
|
|
294
|
+
void HStackInvertedLists::release_ids(size_t, const idx_t* ids) const {
|
|
295
|
+
delete[] ids;
|
|
318
296
|
}
|
|
319
297
|
|
|
320
|
-
void HStackInvertedLists::prefetch_lists
|
|
321
|
-
{
|
|
298
|
+
void HStackInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
|
299
|
+
const {
|
|
322
300
|
for (int i = 0; i < ils.size(); i++) {
|
|
323
|
-
const InvertedLists
|
|
324
|
-
il->prefetch_lists
|
|
301
|
+
const InvertedLists* il = ils[i];
|
|
302
|
+
il->prefetch_lists(list_nos, nlist);
|
|
325
303
|
}
|
|
326
304
|
}
|
|
327
305
|
|
|
@@ -329,203 +307,184 @@ void HStackInvertedLists::prefetch_lists (const idx_t *list_nos, int nlist) cons
|
|
|
329
307
|
* SliceInvertedLists implementation
|
|
330
308
|
******************************************/
|
|
331
309
|
|
|
332
|
-
|
|
333
310
|
namespace {
|
|
334
311
|
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
idx_t translate_list_no (const SliceInvertedLists *sil,
|
|
338
|
-
idx_t list_no) {
|
|
339
|
-
FAISS_THROW_IF_NOT (list_no >= 0 && list_no < sil->nlist);
|
|
340
|
-
return list_no + sil->i0;
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
};
|
|
344
|
-
|
|
312
|
+
using idx_t = InvertedLists::idx_t;
|
|
345
313
|
|
|
314
|
+
idx_t translate_list_no(const SliceInvertedLists* sil, idx_t list_no) {
|
|
315
|
+
FAISS_THROW_IF_NOT(list_no >= 0 && list_no < sil->nlist);
|
|
316
|
+
return list_no + sil->i0;
|
|
317
|
+
}
|
|
346
318
|
|
|
347
|
-
|
|
348
|
-
const InvertedLists *il, idx_t i0, idx_t i1):
|
|
349
|
-
ReadOnlyInvertedLists (i1 - i0, il->code_size),
|
|
350
|
-
il (il), i0(i0), i1(i1)
|
|
351
|
-
{
|
|
319
|
+
}; // namespace
|
|
352
320
|
|
|
353
|
-
|
|
321
|
+
SliceInvertedLists::SliceInvertedLists(
|
|
322
|
+
const InvertedLists* il,
|
|
323
|
+
idx_t i0,
|
|
324
|
+
idx_t i1)
|
|
325
|
+
: ReadOnlyInvertedLists(i1 - i0, il->code_size),
|
|
326
|
+
il(il),
|
|
327
|
+
i0(i0),
|
|
328
|
+
i1(i1) {}
|
|
354
329
|
|
|
355
|
-
size_t SliceInvertedLists::list_size(size_t list_no) const
|
|
356
|
-
|
|
357
|
-
return il->list_size (translate_list_no (this, list_no));
|
|
330
|
+
size_t SliceInvertedLists::list_size(size_t list_no) const {
|
|
331
|
+
return il->list_size(translate_list_no(this, list_no));
|
|
358
332
|
}
|
|
359
333
|
|
|
360
|
-
const uint8_t
|
|
361
|
-
|
|
362
|
-
return il->get_codes (translate_list_no (this, list_no));
|
|
334
|
+
const uint8_t* SliceInvertedLists::get_codes(size_t list_no) const {
|
|
335
|
+
return il->get_codes(translate_list_no(this, list_no));
|
|
363
336
|
}
|
|
364
337
|
|
|
365
|
-
const uint8_t
|
|
366
|
-
|
|
367
|
-
{
|
|
368
|
-
return il->get_single_code
|
|
338
|
+
const uint8_t* SliceInvertedLists::get_single_code(
|
|
339
|
+
size_t list_no,
|
|
340
|
+
size_t offset) const {
|
|
341
|
+
return il->get_single_code(translate_list_no(this, list_no), offset);
|
|
369
342
|
}
|
|
370
343
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
return il->release_codes (translate_list_no (this, list_no), codes);
|
|
344
|
+
void SliceInvertedLists::release_codes(size_t list_no, const uint8_t* codes)
|
|
345
|
+
const {
|
|
346
|
+
return il->release_codes(translate_list_no(this, list_no), codes);
|
|
375
347
|
}
|
|
376
348
|
|
|
377
|
-
const Index::idx_t
|
|
378
|
-
|
|
379
|
-
return il->get_ids (translate_list_no (this, list_no));
|
|
349
|
+
const Index::idx_t* SliceInvertedLists::get_ids(size_t list_no) const {
|
|
350
|
+
return il->get_ids(translate_list_no(this, list_no));
|
|
380
351
|
}
|
|
381
352
|
|
|
382
|
-
Index::idx_t SliceInvertedLists::get_single_id
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
return il->get_single_id (translate_list_no (this, list_no), offset);
|
|
353
|
+
Index::idx_t SliceInvertedLists::get_single_id(size_t list_no, size_t offset)
|
|
354
|
+
const {
|
|
355
|
+
return il->get_single_id(translate_list_no(this, list_no), offset);
|
|
386
356
|
}
|
|
387
357
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
return il->release_ids (translate_list_no (this, list_no), ids);
|
|
358
|
+
void SliceInvertedLists::release_ids(size_t list_no, const idx_t* ids) const {
|
|
359
|
+
return il->release_ids(translate_list_no(this, list_no), ids);
|
|
391
360
|
}
|
|
392
361
|
|
|
393
|
-
void SliceInvertedLists::prefetch_lists
|
|
394
|
-
{
|
|
362
|
+
void SliceInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
|
363
|
+
const {
|
|
395
364
|
std::vector<idx_t> translated_list_nos;
|
|
396
365
|
for (int j = 0; j < nlist; j++) {
|
|
397
366
|
idx_t list_no = list_nos[j];
|
|
398
|
-
if (list_no < 0)
|
|
399
|
-
|
|
367
|
+
if (list_no < 0)
|
|
368
|
+
continue;
|
|
369
|
+
translated_list_nos.push_back(translate_list_no(this, list_no));
|
|
400
370
|
}
|
|
401
|
-
il->prefetch_lists
|
|
402
|
-
translated_list_nos.size());
|
|
371
|
+
il->prefetch_lists(translated_list_nos.data(), translated_list_nos.size());
|
|
403
372
|
}
|
|
404
373
|
|
|
405
|
-
|
|
406
374
|
/*****************************************
|
|
407
375
|
* VStackInvertedLists implementation
|
|
408
376
|
******************************************/
|
|
409
377
|
|
|
410
378
|
namespace {
|
|
411
379
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
i1 = imed;
|
|
426
|
-
}
|
|
380
|
+
using idx_t = InvertedLists::idx_t;
|
|
381
|
+
|
|
382
|
+
// find the invlist this number belongs to
|
|
383
|
+
int translate_list_no(const VStackInvertedLists* vil, idx_t list_no) {
|
|
384
|
+
FAISS_THROW_IF_NOT(list_no >= 0 && list_no < vil->nlist);
|
|
385
|
+
int i0 = 0, i1 = vil->ils.size();
|
|
386
|
+
const idx_t* cumsz = vil->cumsz.data();
|
|
387
|
+
while (i0 + 1 < i1) {
|
|
388
|
+
int imed = (i0 + i1) / 2;
|
|
389
|
+
if (list_no >= cumsz[imed]) {
|
|
390
|
+
i0 = imed;
|
|
391
|
+
} else {
|
|
392
|
+
i1 = imed;
|
|
427
393
|
}
|
|
428
|
-
assert(list_no >= cumsz[i0] && list_no < cumsz[i0 + 1]);
|
|
429
|
-
return i0;
|
|
430
394
|
}
|
|
395
|
+
assert(list_no >= cumsz[i0] && list_no < cumsz[i0 + 1]);
|
|
396
|
+
return i0;
|
|
397
|
+
}
|
|
431
398
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
}
|
|
437
|
-
return tot;
|
|
399
|
+
idx_t sum_il_sizes(int nil, const InvertedLists** ils_in) {
|
|
400
|
+
idx_t tot = 0;
|
|
401
|
+
for (int i = 0; i < nil; i++) {
|
|
402
|
+
tot += ils_in[i]->nlist;
|
|
438
403
|
}
|
|
404
|
+
return tot;
|
|
405
|
+
}
|
|
439
406
|
|
|
440
|
-
};
|
|
441
|
-
|
|
442
|
-
|
|
407
|
+
}; // namespace
|
|
443
408
|
|
|
444
|
-
VStackInvertedLists::VStackInvertedLists
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
cumsz.resize (nil + 1);
|
|
409
|
+
VStackInvertedLists::VStackInvertedLists(int nil, const InvertedLists** ils_in)
|
|
410
|
+
: ReadOnlyInvertedLists(
|
|
411
|
+
sum_il_sizes(nil, ils_in),
|
|
412
|
+
nil > 0 ? ils_in[0]->code_size : 0) {
|
|
413
|
+
FAISS_THROW_IF_NOT(nil > 0);
|
|
414
|
+
cumsz.resize(nil + 1);
|
|
451
415
|
for (int i = 0; i < nil; i++) {
|
|
452
|
-
ils.push_back
|
|
453
|
-
FAISS_THROW_IF_NOT
|
|
416
|
+
ils.push_back(ils_in[i]);
|
|
417
|
+
FAISS_THROW_IF_NOT(ils_in[i]->code_size == code_size);
|
|
454
418
|
cumsz[i + 1] = cumsz[i] + ils_in[i]->nlist;
|
|
455
419
|
}
|
|
456
420
|
}
|
|
457
421
|
|
|
458
|
-
size_t VStackInvertedLists::list_size(size_t list_no) const
|
|
459
|
-
|
|
460
|
-
int i = translate_list_no (this, list_no);
|
|
422
|
+
size_t VStackInvertedLists::list_size(size_t list_no) const {
|
|
423
|
+
int i = translate_list_no(this, list_no);
|
|
461
424
|
list_no -= cumsz[i];
|
|
462
|
-
return ils[i]->list_size
|
|
425
|
+
return ils[i]->list_size(list_no);
|
|
463
426
|
}
|
|
464
427
|
|
|
465
|
-
const uint8_t
|
|
466
|
-
|
|
467
|
-
int i = translate_list_no (this, list_no);
|
|
428
|
+
const uint8_t* VStackInvertedLists::get_codes(size_t list_no) const {
|
|
429
|
+
int i = translate_list_no(this, list_no);
|
|
468
430
|
list_no -= cumsz[i];
|
|
469
|
-
return ils[i]->get_codes
|
|
431
|
+
return ils[i]->get_codes(list_no);
|
|
470
432
|
}
|
|
471
433
|
|
|
472
|
-
const uint8_t
|
|
473
|
-
|
|
474
|
-
{
|
|
475
|
-
int i = translate_list_no
|
|
434
|
+
const uint8_t* VStackInvertedLists::get_single_code(
|
|
435
|
+
size_t list_no,
|
|
436
|
+
size_t offset) const {
|
|
437
|
+
int i = translate_list_no(this, list_no);
|
|
476
438
|
list_no -= cumsz[i];
|
|
477
|
-
return ils[i]->get_single_code
|
|
439
|
+
return ils[i]->get_single_code(list_no, offset);
|
|
478
440
|
}
|
|
479
441
|
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
int i = translate_list_no (this, list_no);
|
|
442
|
+
void VStackInvertedLists::release_codes(size_t list_no, const uint8_t* codes)
|
|
443
|
+
const {
|
|
444
|
+
int i = translate_list_no(this, list_no);
|
|
484
445
|
list_no -= cumsz[i];
|
|
485
|
-
return ils[i]->release_codes
|
|
446
|
+
return ils[i]->release_codes(list_no, codes);
|
|
486
447
|
}
|
|
487
448
|
|
|
488
|
-
const Index::idx_t
|
|
489
|
-
|
|
490
|
-
int i = translate_list_no (this, list_no);
|
|
449
|
+
const Index::idx_t* VStackInvertedLists::get_ids(size_t list_no) const {
|
|
450
|
+
int i = translate_list_no(this, list_no);
|
|
491
451
|
list_no -= cumsz[i];
|
|
492
|
-
return ils[i]->get_ids
|
|
452
|
+
return ils[i]->get_ids(list_no);
|
|
493
453
|
}
|
|
494
454
|
|
|
495
|
-
Index::idx_t VStackInvertedLists::get_single_id
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
int i = translate_list_no (this, list_no);
|
|
455
|
+
Index::idx_t VStackInvertedLists::get_single_id(size_t list_no, size_t offset)
|
|
456
|
+
const {
|
|
457
|
+
int i = translate_list_no(this, list_no);
|
|
499
458
|
list_no -= cumsz[i];
|
|
500
|
-
return ils[i]->get_single_id
|
|
459
|
+
return ils[i]->get_single_id(list_no, offset);
|
|
501
460
|
}
|
|
502
461
|
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
int i = translate_list_no (this, list_no);
|
|
462
|
+
void VStackInvertedLists::release_ids(size_t list_no, const idx_t* ids) const {
|
|
463
|
+
int i = translate_list_no(this, list_no);
|
|
506
464
|
list_no -= cumsz[i];
|
|
507
|
-
return ils[i]->release_ids
|
|
465
|
+
return ils[i]->release_ids(list_no, ids);
|
|
508
466
|
}
|
|
509
467
|
|
|
510
|
-
void VStackInvertedLists::prefetch_lists
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
std::vector<int>
|
|
514
|
-
std::vector<int> n_per_il (ils.size(), 0);
|
|
468
|
+
void VStackInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
|
469
|
+
const {
|
|
470
|
+
std::vector<int> ilno(nlist, -1);
|
|
471
|
+
std::vector<int> n_per_il(ils.size(), 0);
|
|
515
472
|
for (int j = 0; j < nlist; j++) {
|
|
516
473
|
idx_t list_no = list_nos[j];
|
|
517
|
-
if (list_no < 0)
|
|
518
|
-
|
|
474
|
+
if (list_no < 0)
|
|
475
|
+
continue;
|
|
476
|
+
int i = ilno[j] = translate_list_no(this, list_no);
|
|
519
477
|
n_per_il[i]++;
|
|
520
478
|
}
|
|
521
|
-
std::vector<int> cum_n_per_il
|
|
479
|
+
std::vector<int> cum_n_per_il(ils.size() + 1, 0);
|
|
522
480
|
for (int j = 0; j < ils.size(); j++) {
|
|
523
481
|
cum_n_per_il[j + 1] = cum_n_per_il[j] + n_per_il[j];
|
|
524
482
|
}
|
|
525
|
-
std::vector<idx_t> sorted_list_nos
|
|
483
|
+
std::vector<idx_t> sorted_list_nos(cum_n_per_il.back());
|
|
526
484
|
for (int j = 0; j < nlist; j++) {
|
|
527
485
|
idx_t list_no = list_nos[j];
|
|
528
|
-
if (list_no < 0)
|
|
486
|
+
if (list_no < 0)
|
|
487
|
+
continue;
|
|
529
488
|
int i = ilno[j];
|
|
530
489
|
list_no -= cumsz[i];
|
|
531
490
|
sorted_list_nos[cum_n_per_il[i]++] = list_no;
|
|
@@ -535,158 +494,142 @@ void VStackInvertedLists::prefetch_lists (
|
|
|
535
494
|
for (int j = 0; j < ils.size(); j++) {
|
|
536
495
|
int i1 = i0 + n_per_il[j];
|
|
537
496
|
if (i1 > i0) {
|
|
538
|
-
ils[j]->prefetch_lists
|
|
539
|
-
i1 - i0);
|
|
497
|
+
ils[j]->prefetch_lists(sorted_list_nos.data() + i0, i1 - i0);
|
|
540
498
|
}
|
|
541
499
|
i0 = i1;
|
|
542
500
|
}
|
|
543
501
|
}
|
|
544
502
|
|
|
545
|
-
|
|
546
|
-
|
|
547
503
|
/*****************************************
|
|
548
504
|
* MaskedInvertedLists implementation
|
|
549
505
|
******************************************/
|
|
550
506
|
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
{
|
|
557
|
-
FAISS_THROW_IF_NOT
|
|
558
|
-
FAISS_THROW_IF_NOT
|
|
507
|
+
MaskedInvertedLists::MaskedInvertedLists(
|
|
508
|
+
const InvertedLists* il0,
|
|
509
|
+
const InvertedLists* il1)
|
|
510
|
+
: ReadOnlyInvertedLists(il0->nlist, il0->code_size),
|
|
511
|
+
il0(il0),
|
|
512
|
+
il1(il1) {
|
|
513
|
+
FAISS_THROW_IF_NOT(il1->nlist == nlist);
|
|
514
|
+
FAISS_THROW_IF_NOT(il1->code_size == code_size);
|
|
559
515
|
}
|
|
560
516
|
|
|
561
|
-
size_t MaskedInvertedLists::list_size(size_t list_no) const
|
|
562
|
-
{
|
|
517
|
+
size_t MaskedInvertedLists::list_size(size_t list_no) const {
|
|
563
518
|
size_t sz = il0->list_size(list_no);
|
|
564
519
|
return sz ? sz : il1->list_size(list_no);
|
|
565
520
|
}
|
|
566
521
|
|
|
567
|
-
const uint8_t
|
|
568
|
-
{
|
|
522
|
+
const uint8_t* MaskedInvertedLists::get_codes(size_t list_no) const {
|
|
569
523
|
size_t sz = il0->list_size(list_no);
|
|
570
524
|
return (sz ? il0 : il1)->get_codes(list_no);
|
|
571
525
|
}
|
|
572
526
|
|
|
573
|
-
const idx_t
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
return (sz ? il0 : il1)->get_ids (list_no);
|
|
527
|
+
const idx_t* MaskedInvertedLists::get_ids(size_t list_no) const {
|
|
528
|
+
size_t sz = il0->list_size(list_no);
|
|
529
|
+
return (sz ? il0 : il1)->get_ids(list_no);
|
|
577
530
|
}
|
|
578
531
|
|
|
579
|
-
void MaskedInvertedLists::release_codes
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
(sz ? il0 : il1)->release_codes (list_no, codes);
|
|
532
|
+
void MaskedInvertedLists::release_codes(size_t list_no, const uint8_t* codes)
|
|
533
|
+
const {
|
|
534
|
+
size_t sz = il0->list_size(list_no);
|
|
535
|
+
(sz ? il0 : il1)->release_codes(list_no, codes);
|
|
584
536
|
}
|
|
585
537
|
|
|
586
|
-
void MaskedInvertedLists::release_ids
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
(sz ? il0 : il1)->release_ids (list_no, ids);
|
|
538
|
+
void MaskedInvertedLists::release_ids(size_t list_no, const idx_t* ids) const {
|
|
539
|
+
size_t sz = il0->list_size(list_no);
|
|
540
|
+
(sz ? il0 : il1)->release_ids(list_no, ids);
|
|
590
541
|
}
|
|
591
542
|
|
|
592
|
-
idx_t MaskedInvertedLists::get_single_id
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
return (sz ? il0 : il1)->get_single_id (list_no, offset);
|
|
543
|
+
idx_t MaskedInvertedLists::get_single_id(size_t list_no, size_t offset) const {
|
|
544
|
+
size_t sz = il0->list_size(list_no);
|
|
545
|
+
return (sz ? il0 : il1)->get_single_id(list_no, offset);
|
|
596
546
|
}
|
|
597
547
|
|
|
598
|
-
const uint8_t
|
|
599
|
-
|
|
600
|
-
{
|
|
601
|
-
size_t sz = il0->list_size
|
|
602
|
-
return (sz ? il0 : il1)->get_single_code
|
|
548
|
+
const uint8_t* MaskedInvertedLists::get_single_code(
|
|
549
|
+
size_t list_no,
|
|
550
|
+
size_t offset) const {
|
|
551
|
+
size_t sz = il0->list_size(list_no);
|
|
552
|
+
return (sz ? il0 : il1)->get_single_code(list_no, offset);
|
|
603
553
|
}
|
|
604
554
|
|
|
605
|
-
void MaskedInvertedLists::prefetch_lists
|
|
606
|
-
|
|
607
|
-
{
|
|
555
|
+
void MaskedInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
|
556
|
+
const {
|
|
608
557
|
std::vector<idx_t> list0, list1;
|
|
609
558
|
for (int i = 0; i < nlist; i++) {
|
|
610
559
|
idx_t list_no = list_nos[i];
|
|
611
|
-
if (list_no < 0)
|
|
560
|
+
if (list_no < 0)
|
|
561
|
+
continue;
|
|
612
562
|
size_t sz = il0->list_size(list_no);
|
|
613
|
-
(sz ? list0 : list1).push_back
|
|
563
|
+
(sz ? list0 : list1).push_back(list_no);
|
|
614
564
|
}
|
|
615
|
-
il0->prefetch_lists
|
|
616
|
-
il1->prefetch_lists
|
|
565
|
+
il0->prefetch_lists(list0.data(), list0.size());
|
|
566
|
+
il1->prefetch_lists(list1.data(), list1.size());
|
|
617
567
|
}
|
|
618
568
|
|
|
619
569
|
/*****************************************
|
|
620
570
|
* MaskedInvertedLists implementation
|
|
621
571
|
******************************************/
|
|
622
572
|
|
|
573
|
+
StopWordsInvertedLists::StopWordsInvertedLists(
|
|
574
|
+
const InvertedLists* il0,
|
|
575
|
+
size_t maxsize)
|
|
576
|
+
: ReadOnlyInvertedLists(il0->nlist, il0->code_size),
|
|
577
|
+
il0(il0),
|
|
578
|
+
maxsize(maxsize) {}
|
|
623
579
|
|
|
624
|
-
StopWordsInvertedLists::
|
|
625
|
-
const InvertedLists *il0, size_t maxsize):
|
|
626
|
-
ReadOnlyInvertedLists (il0->nlist, il0->code_size),
|
|
627
|
-
il0 (il0), maxsize (maxsize)
|
|
628
|
-
{
|
|
629
|
-
|
|
630
|
-
}
|
|
631
|
-
|
|
632
|
-
size_t StopWordsInvertedLists::list_size(size_t list_no) const
|
|
633
|
-
{
|
|
580
|
+
size_t StopWordsInvertedLists::list_size(size_t list_no) const {
|
|
634
581
|
size_t sz = il0->list_size(list_no);
|
|
635
582
|
return sz < maxsize ? sz : 0;
|
|
636
583
|
}
|
|
637
584
|
|
|
638
|
-
const uint8_t
|
|
639
|
-
|
|
640
|
-
|
|
585
|
+
const uint8_t* StopWordsInvertedLists::get_codes(size_t list_no) const {
|
|
586
|
+
return il0->list_size(list_no) < maxsize ? il0->get_codes(list_no)
|
|
587
|
+
: nullptr;
|
|
641
588
|
}
|
|
642
589
|
|
|
643
|
-
const idx_t
|
|
644
|
-
{
|
|
590
|
+
const idx_t* StopWordsInvertedLists::get_ids(size_t list_no) const {
|
|
645
591
|
return il0->list_size(list_no) < maxsize ? il0->get_ids(list_no) : nullptr;
|
|
646
592
|
}
|
|
647
593
|
|
|
648
|
-
void StopWordsInvertedLists::release_codes
|
|
649
|
-
|
|
650
|
-
{
|
|
651
|
-
|
|
652
|
-
il0->release_codes (list_no, codes);
|
|
594
|
+
void StopWordsInvertedLists::release_codes(size_t list_no, const uint8_t* codes)
|
|
595
|
+
const {
|
|
596
|
+
if (il0->list_size(list_no) < maxsize) {
|
|
597
|
+
il0->release_codes(list_no, codes);
|
|
653
598
|
}
|
|
654
599
|
}
|
|
655
600
|
|
|
656
|
-
void StopWordsInvertedLists::release_ids
|
|
657
|
-
{
|
|
658
|
-
if (il0->list_size
|
|
659
|
-
il0->release_ids
|
|
601
|
+
void StopWordsInvertedLists::release_ids(size_t list_no, const idx_t* ids)
|
|
602
|
+
const {
|
|
603
|
+
if (il0->list_size(list_no) < maxsize) {
|
|
604
|
+
il0->release_ids(list_no, ids);
|
|
660
605
|
}
|
|
661
606
|
}
|
|
662
607
|
|
|
663
|
-
idx_t StopWordsInvertedLists::get_single_id
|
|
664
|
-
{
|
|
665
|
-
FAISS_THROW_IF_NOT(il0->list_size
|
|
666
|
-
return il0->get_single_id
|
|
608
|
+
idx_t StopWordsInvertedLists::get_single_id(size_t list_no, size_t offset)
|
|
609
|
+
const {
|
|
610
|
+
FAISS_THROW_IF_NOT(il0->list_size(list_no) < maxsize);
|
|
611
|
+
return il0->get_single_id(list_no, offset);
|
|
667
612
|
}
|
|
668
613
|
|
|
669
|
-
const uint8_t
|
|
670
|
-
|
|
671
|
-
{
|
|
672
|
-
FAISS_THROW_IF_NOT(il0->list_size
|
|
673
|
-
return il0->get_single_code
|
|
614
|
+
const uint8_t* StopWordsInvertedLists::get_single_code(
|
|
615
|
+
size_t list_no,
|
|
616
|
+
size_t offset) const {
|
|
617
|
+
FAISS_THROW_IF_NOT(il0->list_size(list_no) < maxsize);
|
|
618
|
+
return il0->get_single_code(list_no, offset);
|
|
674
619
|
}
|
|
675
620
|
|
|
676
|
-
void StopWordsInvertedLists::prefetch_lists
|
|
677
|
-
|
|
678
|
-
{
|
|
621
|
+
void StopWordsInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
|
622
|
+
const {
|
|
679
623
|
std::vector<idx_t> list0;
|
|
680
624
|
for (int i = 0; i < nlist; i++) {
|
|
681
625
|
idx_t list_no = list_nos[i];
|
|
682
|
-
if (list_no < 0)
|
|
626
|
+
if (list_no < 0)
|
|
627
|
+
continue;
|
|
683
628
|
if (il0->list_size(list_no) < maxsize) {
|
|
684
629
|
list0.push_back(list_no);
|
|
685
630
|
}
|
|
686
631
|
}
|
|
687
|
-
il0->prefetch_lists
|
|
632
|
+
il0->prefetch_lists(list0.data(), list0.size());
|
|
688
633
|
}
|
|
689
634
|
|
|
690
|
-
|
|
691
|
-
|
|
692
635
|
} // namespace faiss
|