faiss 0.2.0 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
@@ -11,90 +11,80 @@
|
|
11
11
|
|
12
12
|
#include <cstdio>
|
13
13
|
|
14
|
-
#include <faiss/utils/utils.h>
|
15
14
|
#include <faiss/impl/FaissAssert.h>
|
15
|
+
#include <faiss/utils/utils.h>
|
16
16
|
|
17
17
|
namespace faiss {
|
18
18
|
|
19
|
-
|
20
|
-
|
21
19
|
/*****************************************
|
22
20
|
* InvertedLists implementation
|
23
21
|
******************************************/
|
24
22
|
|
25
|
-
InvertedLists::InvertedLists
|
26
|
-
|
27
|
-
{
|
28
|
-
}
|
23
|
+
InvertedLists::InvertedLists(size_t nlist, size_t code_size)
|
24
|
+
: nlist(nlist), code_size(code_size) {}
|
29
25
|
|
30
|
-
InvertedLists::~InvertedLists
|
31
|
-
{}
|
26
|
+
InvertedLists::~InvertedLists() {}
|
32
27
|
|
33
|
-
InvertedLists::idx_t InvertedLists::get_single_id
|
34
|
-
|
35
|
-
|
36
|
-
assert (offset < list_size (list_no));
|
28
|
+
InvertedLists::idx_t InvertedLists::get_single_id(size_t list_no, size_t offset)
|
29
|
+
const {
|
30
|
+
assert(offset < list_size(list_no));
|
37
31
|
return get_ids(list_no)[offset];
|
38
32
|
}
|
39
33
|
|
34
|
+
void InvertedLists::release_codes(size_t, const uint8_t*) const {}
|
40
35
|
|
41
|
-
void InvertedLists::
|
42
|
-
{}
|
43
|
-
|
44
|
-
void InvertedLists::release_ids (size_t, const idx_t *) const
|
45
|
-
{}
|
36
|
+
void InvertedLists::release_ids(size_t, const idx_t*) const {}
|
46
37
|
|
47
|
-
void InvertedLists::prefetch_lists
|
48
|
-
{}
|
38
|
+
void InvertedLists::prefetch_lists(const idx_t*, int) const {}
|
49
39
|
|
50
|
-
const uint8_t
|
51
|
-
|
52
|
-
|
53
|
-
assert (offset < list_size (list_no));
|
40
|
+
const uint8_t* InvertedLists::get_single_code(size_t list_no, size_t offset)
|
41
|
+
const {
|
42
|
+
assert(offset < list_size(list_no));
|
54
43
|
return get_codes(list_no) + offset * code_size;
|
55
44
|
}
|
56
45
|
|
57
|
-
size_t InvertedLists::add_entry
|
58
|
-
|
59
|
-
|
60
|
-
|
46
|
+
size_t InvertedLists::add_entry(
|
47
|
+
size_t list_no,
|
48
|
+
idx_t theid,
|
49
|
+
const uint8_t* code) {
|
50
|
+
return add_entries(list_no, 1, &theid, code);
|
61
51
|
}
|
62
52
|
|
63
|
-
void InvertedLists::update_entry
|
64
|
-
|
65
|
-
|
66
|
-
|
53
|
+
void InvertedLists::update_entry(
|
54
|
+
size_t list_no,
|
55
|
+
size_t offset,
|
56
|
+
idx_t id,
|
57
|
+
const uint8_t* code) {
|
58
|
+
update_entries(list_no, offset, 1, &id, code);
|
67
59
|
}
|
68
60
|
|
69
|
-
void InvertedLists::reset
|
61
|
+
void InvertedLists::reset() {
|
70
62
|
for (size_t i = 0; i < nlist; i++) {
|
71
|
-
resize
|
63
|
+
resize(i, 0);
|
72
64
|
}
|
73
65
|
}
|
74
66
|
|
75
|
-
void InvertedLists::merge_from
|
76
|
-
|
67
|
+
void InvertedLists::merge_from(InvertedLists* oivf, size_t add_id) {
|
77
68
|
#pragma omp parallel for
|
78
69
|
for (idx_t i = 0; i < nlist; i++) {
|
79
|
-
size_t list_size = oivf->list_size
|
80
|
-
ScopedIds ids
|
70
|
+
size_t list_size = oivf->list_size(i);
|
71
|
+
ScopedIds ids(oivf, i);
|
81
72
|
if (add_id == 0) {
|
82
|
-
add_entries
|
83
|
-
ScopedCodes (oivf, i).get());
|
73
|
+
add_entries(i, list_size, ids.get(), ScopedCodes(oivf, i).get());
|
84
74
|
} else {
|
85
|
-
std::vector
|
75
|
+
std::vector<idx_t> new_ids(list_size);
|
86
76
|
|
87
77
|
for (size_t j = 0; j < list_size; j++) {
|
88
|
-
new_ids
|
78
|
+
new_ids[j] = ids[j] + add_id;
|
89
79
|
}
|
90
|
-
add_entries
|
91
|
-
|
80
|
+
add_entries(
|
81
|
+
i, list_size, new_ids.data(), ScopedCodes(oivf, i).get());
|
92
82
|
}
|
93
|
-
oivf->resize
|
83
|
+
oivf->resize(i, 0);
|
94
84
|
}
|
95
85
|
}
|
96
86
|
|
97
|
-
double InvertedLists::imbalance_factor
|
87
|
+
double InvertedLists::imbalance_factor() const {
|
98
88
|
std::vector<int> hist(nlist);
|
99
89
|
|
100
90
|
for (size_t i = 0; i < nlist; i++) {
|
@@ -104,7 +94,7 @@ double InvertedLists::imbalance_factor () const {
|
|
104
94
|
return faiss::imbalance_factor(nlist, hist.data());
|
105
95
|
}
|
106
96
|
|
107
|
-
void InvertedLists::print_stats
|
97
|
+
void InvertedLists::print_stats() const {
|
108
98
|
std::vector<int> sizes(40);
|
109
99
|
for (size_t i = 0; i < nlist; i++) {
|
110
100
|
for (size_t j = 0; j < sizes.size(); j++) {
|
@@ -121,7 +111,7 @@ void InvertedLists::print_stats () const {
|
|
121
111
|
}
|
122
112
|
}
|
123
113
|
|
124
|
-
size_t InvertedLists::compute_ntotal
|
114
|
+
size_t InvertedLists::compute_ntotal() const {
|
125
115
|
size_t tot = 0;
|
126
116
|
for (size_t i = 0; i < nlist; i++) {
|
127
117
|
tot += list_size(i);
|
@@ -133,195 +123,183 @@ size_t InvertedLists::compute_ntotal () const {
|
|
133
123
|
* ArrayInvertedLists implementation
|
134
124
|
******************************************/
|
135
125
|
|
136
|
-
ArrayInvertedLists::ArrayInvertedLists
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
size_t
|
144
|
-
|
145
|
-
|
146
|
-
{
|
147
|
-
if (n_entry == 0)
|
148
|
-
|
149
|
-
|
150
|
-
ids
|
151
|
-
|
152
|
-
|
153
|
-
|
126
|
+
ArrayInvertedLists::ArrayInvertedLists(size_t nlist, size_t code_size)
|
127
|
+
: InvertedLists(nlist, code_size) {
|
128
|
+
ids.resize(nlist);
|
129
|
+
codes.resize(nlist);
|
130
|
+
}
|
131
|
+
|
132
|
+
size_t ArrayInvertedLists::add_entries(
|
133
|
+
size_t list_no,
|
134
|
+
size_t n_entry,
|
135
|
+
const idx_t* ids_in,
|
136
|
+
const uint8_t* code) {
|
137
|
+
if (n_entry == 0)
|
138
|
+
return 0;
|
139
|
+
assert(list_no < nlist);
|
140
|
+
size_t o = ids[list_no].size();
|
141
|
+
ids[list_no].resize(o + n_entry);
|
142
|
+
memcpy(&ids[list_no][o], ids_in, sizeof(ids_in[0]) * n_entry);
|
143
|
+
codes[list_no].resize((o + n_entry) * code_size);
|
144
|
+
memcpy(&codes[list_no][o * code_size], code, code_size * n_entry);
|
154
145
|
return o;
|
155
146
|
}
|
156
147
|
|
157
|
-
size_t ArrayInvertedLists::list_size(size_t list_no) const
|
158
|
-
|
159
|
-
assert (list_no < nlist);
|
148
|
+
size_t ArrayInvertedLists::list_size(size_t list_no) const {
|
149
|
+
assert(list_no < nlist);
|
160
150
|
return ids[list_no].size();
|
161
151
|
}
|
162
152
|
|
163
|
-
const uint8_t
|
164
|
-
|
165
|
-
assert (list_no < nlist);
|
153
|
+
const uint8_t* ArrayInvertedLists::get_codes(size_t list_no) const {
|
154
|
+
assert(list_no < nlist);
|
166
155
|
return codes[list_no].data();
|
167
156
|
}
|
168
157
|
|
169
|
-
|
170
|
-
|
171
|
-
{
|
172
|
-
assert (list_no < nlist);
|
158
|
+
const InvertedLists::idx_t* ArrayInvertedLists::get_ids(size_t list_no) const {
|
159
|
+
assert(list_no < nlist);
|
173
160
|
return ids[list_no].data();
|
174
161
|
}
|
175
162
|
|
176
|
-
void ArrayInvertedLists::resize
|
177
|
-
|
178
|
-
|
179
|
-
codes[list_no].resize (new_size * code_size);
|
163
|
+
void ArrayInvertedLists::resize(size_t list_no, size_t new_size) {
|
164
|
+
ids[list_no].resize(new_size);
|
165
|
+
codes[list_no].resize(new_size * code_size);
|
180
166
|
}
|
181
167
|
|
182
|
-
void ArrayInvertedLists::update_entries
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
168
|
+
void ArrayInvertedLists::update_entries(
|
169
|
+
size_t list_no,
|
170
|
+
size_t offset,
|
171
|
+
size_t n_entry,
|
172
|
+
const idx_t* ids_in,
|
173
|
+
const uint8_t* codes_in) {
|
174
|
+
assert(list_no < nlist);
|
175
|
+
assert(n_entry + offset <= ids[list_no].size());
|
176
|
+
memcpy(&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
|
177
|
+
memcpy(&codes[list_no][offset * code_size], codes_in, code_size * n_entry);
|
190
178
|
}
|
191
179
|
|
192
|
-
|
193
|
-
ArrayInvertedLists::~ArrayInvertedLists ()
|
194
|
-
{}
|
180
|
+
ArrayInvertedLists::~ArrayInvertedLists() {}
|
195
181
|
|
196
182
|
/*****************************************************************
|
197
183
|
* Meta-inverted list implementations
|
198
184
|
*****************************************************************/
|
199
185
|
|
200
|
-
|
201
|
-
size_t
|
202
|
-
|
203
|
-
|
204
|
-
{
|
205
|
-
FAISS_THROW_MSG
|
186
|
+
size_t ReadOnlyInvertedLists::add_entries(
|
187
|
+
size_t,
|
188
|
+
size_t,
|
189
|
+
const idx_t*,
|
190
|
+
const uint8_t*) {
|
191
|
+
FAISS_THROW_MSG("not implemented");
|
206
192
|
}
|
207
193
|
|
208
|
-
void ReadOnlyInvertedLists::update_entries
|
209
|
-
|
210
|
-
|
211
|
-
|
194
|
+
void ReadOnlyInvertedLists::update_entries(
|
195
|
+
size_t,
|
196
|
+
size_t,
|
197
|
+
size_t,
|
198
|
+
const idx_t*,
|
199
|
+
const uint8_t*) {
|
200
|
+
FAISS_THROW_MSG("not implemented");
|
212
201
|
}
|
213
202
|
|
214
|
-
void ReadOnlyInvertedLists::resize
|
215
|
-
|
216
|
-
FAISS_THROW_MSG ("not implemented");
|
203
|
+
void ReadOnlyInvertedLists::resize(size_t, size_t) {
|
204
|
+
FAISS_THROW_MSG("not implemented");
|
217
205
|
}
|
218
206
|
|
219
|
-
|
220
|
-
|
221
207
|
/*****************************************
|
222
208
|
* HStackInvertedLists implementation
|
223
209
|
******************************************/
|
224
210
|
|
225
|
-
HStackInvertedLists::HStackInvertedLists
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
FAISS_THROW_IF_NOT (nil > 0);
|
211
|
+
HStackInvertedLists::HStackInvertedLists(int nil, const InvertedLists** ils_in)
|
212
|
+
: ReadOnlyInvertedLists(
|
213
|
+
nil > 0 ? ils_in[0]->nlist : 0,
|
214
|
+
nil > 0 ? ils_in[0]->code_size : 0) {
|
215
|
+
FAISS_THROW_IF_NOT(nil > 0);
|
231
216
|
for (int i = 0; i < nil; i++) {
|
232
|
-
ils.push_back
|
233
|
-
FAISS_THROW_IF_NOT
|
234
|
-
|
217
|
+
ils.push_back(ils_in[i]);
|
218
|
+
FAISS_THROW_IF_NOT(
|
219
|
+
ils_in[i]->code_size == code_size && ils_in[i]->nlist == nlist);
|
235
220
|
}
|
236
221
|
}
|
237
222
|
|
238
|
-
size_t HStackInvertedLists::list_size(size_t list_no) const
|
239
|
-
{
|
223
|
+
size_t HStackInvertedLists::list_size(size_t list_no) const {
|
240
224
|
size_t sz = 0;
|
241
225
|
for (int i = 0; i < ils.size(); i++) {
|
242
|
-
const InvertedLists
|
243
|
-
sz += il->list_size
|
226
|
+
const InvertedLists* il = ils[i];
|
227
|
+
sz += il->list_size(list_no);
|
244
228
|
}
|
245
229
|
return sz;
|
246
230
|
}
|
247
231
|
|
248
|
-
const uint8_t
|
249
|
-
|
250
|
-
uint8_t *codes = new uint8_t [code_size * list_size(list_no)], *c = codes;
|
232
|
+
const uint8_t* HStackInvertedLists::get_codes(size_t list_no) const {
|
233
|
+
uint8_t *codes = new uint8_t[code_size * list_size(list_no)], *c = codes;
|
251
234
|
|
252
235
|
for (int i = 0; i < ils.size(); i++) {
|
253
|
-
const InvertedLists
|
236
|
+
const InvertedLists* il = ils[i];
|
254
237
|
size_t sz = il->list_size(list_no) * code_size;
|
255
238
|
if (sz > 0) {
|
256
|
-
memcpy
|
239
|
+
memcpy(c, ScopedCodes(il, list_no).get(), sz);
|
257
240
|
c += sz;
|
258
241
|
}
|
259
242
|
}
|
260
243
|
return codes;
|
261
244
|
}
|
262
245
|
|
263
|
-
const uint8_t
|
264
|
-
|
265
|
-
{
|
246
|
+
const uint8_t* HStackInvertedLists::get_single_code(
|
247
|
+
size_t list_no,
|
248
|
+
size_t offset) const {
|
266
249
|
for (int i = 0; i < ils.size(); i++) {
|
267
|
-
const InvertedLists
|
268
|
-
size_t sz = il->list_size
|
250
|
+
const InvertedLists* il = ils[i];
|
251
|
+
size_t sz = il->list_size(list_no);
|
269
252
|
if (offset < sz) {
|
270
253
|
// here we have to copy the code, otherwise it will crash at dealloc
|
271
|
-
uint8_t
|
272
|
-
memcpy
|
254
|
+
uint8_t* code = new uint8_t[code_size];
|
255
|
+
memcpy(code, ScopedCodes(il, list_no, offset).get(), code_size);
|
273
256
|
return code;
|
274
257
|
}
|
275
258
|
offset -= sz;
|
276
259
|
}
|
277
|
-
FAISS_THROW_FMT
|
260
|
+
FAISS_THROW_FMT("offset %zd unknown", offset);
|
278
261
|
}
|
279
262
|
|
280
|
-
|
281
|
-
|
282
|
-
delete [] codes;
|
263
|
+
void HStackInvertedLists::release_codes(size_t, const uint8_t* codes) const {
|
264
|
+
delete[] codes;
|
283
265
|
}
|
284
266
|
|
285
|
-
const Index::idx_t
|
286
|
-
|
287
|
-
idx_t *ids = new idx_t [list_size(list_no)], *c = ids;
|
267
|
+
const Index::idx_t* HStackInvertedLists::get_ids(size_t list_no) const {
|
268
|
+
idx_t *ids = new idx_t[list_size(list_no)], *c = ids;
|
288
269
|
|
289
270
|
for (int i = 0; i < ils.size(); i++) {
|
290
|
-
const InvertedLists
|
271
|
+
const InvertedLists* il = ils[i];
|
291
272
|
size_t sz = il->list_size(list_no);
|
292
273
|
if (sz > 0) {
|
293
|
-
memcpy
|
274
|
+
memcpy(c, ScopedIds(il, list_no).get(), sz * sizeof(idx_t));
|
294
275
|
c += sz;
|
295
276
|
}
|
296
277
|
}
|
297
278
|
return ids;
|
298
279
|
}
|
299
280
|
|
300
|
-
Index::idx_t HStackInvertedLists::get_single_id
|
301
|
-
|
302
|
-
{
|
303
|
-
|
281
|
+
Index::idx_t HStackInvertedLists::get_single_id(size_t list_no, size_t offset)
|
282
|
+
const {
|
304
283
|
for (int i = 0; i < ils.size(); i++) {
|
305
|
-
const InvertedLists
|
306
|
-
size_t sz = il->list_size
|
284
|
+
const InvertedLists* il = ils[i];
|
285
|
+
size_t sz = il->list_size(list_no);
|
307
286
|
if (offset < sz) {
|
308
|
-
return il->get_single_id
|
287
|
+
return il->get_single_id(list_no, offset);
|
309
288
|
}
|
310
289
|
offset -= sz;
|
311
290
|
}
|
312
|
-
FAISS_THROW_FMT
|
291
|
+
FAISS_THROW_FMT("offset %zd unknown", offset);
|
313
292
|
}
|
314
293
|
|
315
|
-
|
316
|
-
|
317
|
-
delete [] ids;
|
294
|
+
void HStackInvertedLists::release_ids(size_t, const idx_t* ids) const {
|
295
|
+
delete[] ids;
|
318
296
|
}
|
319
297
|
|
320
|
-
void HStackInvertedLists::prefetch_lists
|
321
|
-
{
|
298
|
+
void HStackInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
299
|
+
const {
|
322
300
|
for (int i = 0; i < ils.size(); i++) {
|
323
|
-
const InvertedLists
|
324
|
-
il->prefetch_lists
|
301
|
+
const InvertedLists* il = ils[i];
|
302
|
+
il->prefetch_lists(list_nos, nlist);
|
325
303
|
}
|
326
304
|
}
|
327
305
|
|
@@ -329,203 +307,184 @@ void HStackInvertedLists::prefetch_lists (const idx_t *list_nos, int nlist) cons
|
|
329
307
|
* SliceInvertedLists implementation
|
330
308
|
******************************************/
|
331
309
|
|
332
|
-
|
333
310
|
namespace {
|
334
311
|
|
335
|
-
|
336
|
-
|
337
|
-
idx_t translate_list_no (const SliceInvertedLists *sil,
|
338
|
-
idx_t list_no) {
|
339
|
-
FAISS_THROW_IF_NOT (list_no >= 0 && list_no < sil->nlist);
|
340
|
-
return list_no + sil->i0;
|
341
|
-
}
|
342
|
-
|
343
|
-
};
|
344
|
-
|
312
|
+
using idx_t = InvertedLists::idx_t;
|
345
313
|
|
314
|
+
idx_t translate_list_no(const SliceInvertedLists* sil, idx_t list_no) {
|
315
|
+
FAISS_THROW_IF_NOT(list_no >= 0 && list_no < sil->nlist);
|
316
|
+
return list_no + sil->i0;
|
317
|
+
}
|
346
318
|
|
347
|
-
|
348
|
-
const InvertedLists *il, idx_t i0, idx_t i1):
|
349
|
-
ReadOnlyInvertedLists (i1 - i0, il->code_size),
|
350
|
-
il (il), i0(i0), i1(i1)
|
351
|
-
{
|
319
|
+
}; // namespace
|
352
320
|
|
353
|
-
|
321
|
+
SliceInvertedLists::SliceInvertedLists(
|
322
|
+
const InvertedLists* il,
|
323
|
+
idx_t i0,
|
324
|
+
idx_t i1)
|
325
|
+
: ReadOnlyInvertedLists(i1 - i0, il->code_size),
|
326
|
+
il(il),
|
327
|
+
i0(i0),
|
328
|
+
i1(i1) {}
|
354
329
|
|
355
|
-
size_t SliceInvertedLists::list_size(size_t list_no) const
|
356
|
-
|
357
|
-
return il->list_size (translate_list_no (this, list_no));
|
330
|
+
size_t SliceInvertedLists::list_size(size_t list_no) const {
|
331
|
+
return il->list_size(translate_list_no(this, list_no));
|
358
332
|
}
|
359
333
|
|
360
|
-
const uint8_t
|
361
|
-
|
362
|
-
return il->get_codes (translate_list_no (this, list_no));
|
334
|
+
const uint8_t* SliceInvertedLists::get_codes(size_t list_no) const {
|
335
|
+
return il->get_codes(translate_list_no(this, list_no));
|
363
336
|
}
|
364
337
|
|
365
|
-
const uint8_t
|
366
|
-
|
367
|
-
{
|
368
|
-
return il->get_single_code
|
338
|
+
const uint8_t* SliceInvertedLists::get_single_code(
|
339
|
+
size_t list_no,
|
340
|
+
size_t offset) const {
|
341
|
+
return il->get_single_code(translate_list_no(this, list_no), offset);
|
369
342
|
}
|
370
343
|
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
return il->release_codes (translate_list_no (this, list_no), codes);
|
344
|
+
void SliceInvertedLists::release_codes(size_t list_no, const uint8_t* codes)
|
345
|
+
const {
|
346
|
+
return il->release_codes(translate_list_no(this, list_no), codes);
|
375
347
|
}
|
376
348
|
|
377
|
-
const Index::idx_t
|
378
|
-
|
379
|
-
return il->get_ids (translate_list_no (this, list_no));
|
349
|
+
const Index::idx_t* SliceInvertedLists::get_ids(size_t list_no) const {
|
350
|
+
return il->get_ids(translate_list_no(this, list_no));
|
380
351
|
}
|
381
352
|
|
382
|
-
Index::idx_t SliceInvertedLists::get_single_id
|
383
|
-
|
384
|
-
|
385
|
-
return il->get_single_id (translate_list_no (this, list_no), offset);
|
353
|
+
Index::idx_t SliceInvertedLists::get_single_id(size_t list_no, size_t offset)
|
354
|
+
const {
|
355
|
+
return il->get_single_id(translate_list_no(this, list_no), offset);
|
386
356
|
}
|
387
357
|
|
388
|
-
|
389
|
-
|
390
|
-
return il->release_ids (translate_list_no (this, list_no), ids);
|
358
|
+
void SliceInvertedLists::release_ids(size_t list_no, const idx_t* ids) const {
|
359
|
+
return il->release_ids(translate_list_no(this, list_no), ids);
|
391
360
|
}
|
392
361
|
|
393
|
-
void SliceInvertedLists::prefetch_lists
|
394
|
-
{
|
362
|
+
void SliceInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
363
|
+
const {
|
395
364
|
std::vector<idx_t> translated_list_nos;
|
396
365
|
for (int j = 0; j < nlist; j++) {
|
397
366
|
idx_t list_no = list_nos[j];
|
398
|
-
if (list_no < 0)
|
399
|
-
|
367
|
+
if (list_no < 0)
|
368
|
+
continue;
|
369
|
+
translated_list_nos.push_back(translate_list_no(this, list_no));
|
400
370
|
}
|
401
|
-
il->prefetch_lists
|
402
|
-
translated_list_nos.size());
|
371
|
+
il->prefetch_lists(translated_list_nos.data(), translated_list_nos.size());
|
403
372
|
}
|
404
373
|
|
405
|
-
|
406
374
|
/*****************************************
|
407
375
|
* VStackInvertedLists implementation
|
408
376
|
******************************************/
|
409
377
|
|
410
378
|
namespace {
|
411
379
|
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
i1 = imed;
|
426
|
-
}
|
380
|
+
using idx_t = InvertedLists::idx_t;
|
381
|
+
|
382
|
+
// find the invlist this number belongs to
|
383
|
+
int translate_list_no(const VStackInvertedLists* vil, idx_t list_no) {
|
384
|
+
FAISS_THROW_IF_NOT(list_no >= 0 && list_no < vil->nlist);
|
385
|
+
int i0 = 0, i1 = vil->ils.size();
|
386
|
+
const idx_t* cumsz = vil->cumsz.data();
|
387
|
+
while (i0 + 1 < i1) {
|
388
|
+
int imed = (i0 + i1) / 2;
|
389
|
+
if (list_no >= cumsz[imed]) {
|
390
|
+
i0 = imed;
|
391
|
+
} else {
|
392
|
+
i1 = imed;
|
427
393
|
}
|
428
|
-
assert(list_no >= cumsz[i0] && list_no < cumsz[i0 + 1]);
|
429
|
-
return i0;
|
430
394
|
}
|
395
|
+
assert(list_no >= cumsz[i0] && list_no < cumsz[i0 + 1]);
|
396
|
+
return i0;
|
397
|
+
}
|
431
398
|
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
}
|
437
|
-
return tot;
|
399
|
+
idx_t sum_il_sizes(int nil, const InvertedLists** ils_in) {
|
400
|
+
idx_t tot = 0;
|
401
|
+
for (int i = 0; i < nil; i++) {
|
402
|
+
tot += ils_in[i]->nlist;
|
438
403
|
}
|
404
|
+
return tot;
|
405
|
+
}
|
439
406
|
|
440
|
-
};
|
441
|
-
|
442
|
-
|
407
|
+
}; // namespace
|
443
408
|
|
444
|
-
VStackInvertedLists::VStackInvertedLists
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
cumsz.resize (nil + 1);
|
409
|
+
VStackInvertedLists::VStackInvertedLists(int nil, const InvertedLists** ils_in)
|
410
|
+
: ReadOnlyInvertedLists(
|
411
|
+
sum_il_sizes(nil, ils_in),
|
412
|
+
nil > 0 ? ils_in[0]->code_size : 0) {
|
413
|
+
FAISS_THROW_IF_NOT(nil > 0);
|
414
|
+
cumsz.resize(nil + 1);
|
451
415
|
for (int i = 0; i < nil; i++) {
|
452
|
-
ils.push_back
|
453
|
-
FAISS_THROW_IF_NOT
|
416
|
+
ils.push_back(ils_in[i]);
|
417
|
+
FAISS_THROW_IF_NOT(ils_in[i]->code_size == code_size);
|
454
418
|
cumsz[i + 1] = cumsz[i] + ils_in[i]->nlist;
|
455
419
|
}
|
456
420
|
}
|
457
421
|
|
458
|
-
size_t VStackInvertedLists::list_size(size_t list_no) const
|
459
|
-
|
460
|
-
int i = translate_list_no (this, list_no);
|
422
|
+
size_t VStackInvertedLists::list_size(size_t list_no) const {
|
423
|
+
int i = translate_list_no(this, list_no);
|
461
424
|
list_no -= cumsz[i];
|
462
|
-
return ils[i]->list_size
|
425
|
+
return ils[i]->list_size(list_no);
|
463
426
|
}
|
464
427
|
|
465
|
-
const uint8_t
|
466
|
-
|
467
|
-
int i = translate_list_no (this, list_no);
|
428
|
+
const uint8_t* VStackInvertedLists::get_codes(size_t list_no) const {
|
429
|
+
int i = translate_list_no(this, list_no);
|
468
430
|
list_no -= cumsz[i];
|
469
|
-
return ils[i]->get_codes
|
431
|
+
return ils[i]->get_codes(list_no);
|
470
432
|
}
|
471
433
|
|
472
|
-
const uint8_t
|
473
|
-
|
474
|
-
{
|
475
|
-
int i = translate_list_no
|
434
|
+
const uint8_t* VStackInvertedLists::get_single_code(
|
435
|
+
size_t list_no,
|
436
|
+
size_t offset) const {
|
437
|
+
int i = translate_list_no(this, list_no);
|
476
438
|
list_no -= cumsz[i];
|
477
|
-
return ils[i]->get_single_code
|
439
|
+
return ils[i]->get_single_code(list_no, offset);
|
478
440
|
}
|
479
441
|
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
int i = translate_list_no (this, list_no);
|
442
|
+
void VStackInvertedLists::release_codes(size_t list_no, const uint8_t* codes)
|
443
|
+
const {
|
444
|
+
int i = translate_list_no(this, list_no);
|
484
445
|
list_no -= cumsz[i];
|
485
|
-
return ils[i]->release_codes
|
446
|
+
return ils[i]->release_codes(list_no, codes);
|
486
447
|
}
|
487
448
|
|
488
|
-
const Index::idx_t
|
489
|
-
|
490
|
-
int i = translate_list_no (this, list_no);
|
449
|
+
const Index::idx_t* VStackInvertedLists::get_ids(size_t list_no) const {
|
450
|
+
int i = translate_list_no(this, list_no);
|
491
451
|
list_no -= cumsz[i];
|
492
|
-
return ils[i]->get_ids
|
452
|
+
return ils[i]->get_ids(list_no);
|
493
453
|
}
|
494
454
|
|
495
|
-
Index::idx_t VStackInvertedLists::get_single_id
|
496
|
-
|
497
|
-
|
498
|
-
int i = translate_list_no (this, list_no);
|
455
|
+
Index::idx_t VStackInvertedLists::get_single_id(size_t list_no, size_t offset)
|
456
|
+
const {
|
457
|
+
int i = translate_list_no(this, list_no);
|
499
458
|
list_no -= cumsz[i];
|
500
|
-
return ils[i]->get_single_id
|
459
|
+
return ils[i]->get_single_id(list_no, offset);
|
501
460
|
}
|
502
461
|
|
503
|
-
|
504
|
-
|
505
|
-
int i = translate_list_no (this, list_no);
|
462
|
+
void VStackInvertedLists::release_ids(size_t list_no, const idx_t* ids) const {
|
463
|
+
int i = translate_list_no(this, list_no);
|
506
464
|
list_no -= cumsz[i];
|
507
|
-
return ils[i]->release_ids
|
465
|
+
return ils[i]->release_ids(list_no, ids);
|
508
466
|
}
|
509
467
|
|
510
|
-
void VStackInvertedLists::prefetch_lists
|
511
|
-
|
512
|
-
|
513
|
-
std::vector<int>
|
514
|
-
std::vector<int> n_per_il (ils.size(), 0);
|
468
|
+
void VStackInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
469
|
+
const {
|
470
|
+
std::vector<int> ilno(nlist, -1);
|
471
|
+
std::vector<int> n_per_il(ils.size(), 0);
|
515
472
|
for (int j = 0; j < nlist; j++) {
|
516
473
|
idx_t list_no = list_nos[j];
|
517
|
-
if (list_no < 0)
|
518
|
-
|
474
|
+
if (list_no < 0)
|
475
|
+
continue;
|
476
|
+
int i = ilno[j] = translate_list_no(this, list_no);
|
519
477
|
n_per_il[i]++;
|
520
478
|
}
|
521
|
-
std::vector<int> cum_n_per_il
|
479
|
+
std::vector<int> cum_n_per_il(ils.size() + 1, 0);
|
522
480
|
for (int j = 0; j < ils.size(); j++) {
|
523
481
|
cum_n_per_il[j + 1] = cum_n_per_il[j] + n_per_il[j];
|
524
482
|
}
|
525
|
-
std::vector<idx_t> sorted_list_nos
|
483
|
+
std::vector<idx_t> sorted_list_nos(cum_n_per_il.back());
|
526
484
|
for (int j = 0; j < nlist; j++) {
|
527
485
|
idx_t list_no = list_nos[j];
|
528
|
-
if (list_no < 0)
|
486
|
+
if (list_no < 0)
|
487
|
+
continue;
|
529
488
|
int i = ilno[j];
|
530
489
|
list_no -= cumsz[i];
|
531
490
|
sorted_list_nos[cum_n_per_il[i]++] = list_no;
|
@@ -535,158 +494,142 @@ void VStackInvertedLists::prefetch_lists (
|
|
535
494
|
for (int j = 0; j < ils.size(); j++) {
|
536
495
|
int i1 = i0 + n_per_il[j];
|
537
496
|
if (i1 > i0) {
|
538
|
-
ils[j]->prefetch_lists
|
539
|
-
i1 - i0);
|
497
|
+
ils[j]->prefetch_lists(sorted_list_nos.data() + i0, i1 - i0);
|
540
498
|
}
|
541
499
|
i0 = i1;
|
542
500
|
}
|
543
501
|
}
|
544
502
|
|
545
|
-
|
546
|
-
|
547
503
|
/*****************************************
|
548
504
|
* MaskedInvertedLists implementation
|
549
505
|
******************************************/
|
550
506
|
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
{
|
557
|
-
FAISS_THROW_IF_NOT
|
558
|
-
FAISS_THROW_IF_NOT
|
507
|
+
MaskedInvertedLists::MaskedInvertedLists(
|
508
|
+
const InvertedLists* il0,
|
509
|
+
const InvertedLists* il1)
|
510
|
+
: ReadOnlyInvertedLists(il0->nlist, il0->code_size),
|
511
|
+
il0(il0),
|
512
|
+
il1(il1) {
|
513
|
+
FAISS_THROW_IF_NOT(il1->nlist == nlist);
|
514
|
+
FAISS_THROW_IF_NOT(il1->code_size == code_size);
|
559
515
|
}
|
560
516
|
|
561
|
-
size_t MaskedInvertedLists::list_size(size_t list_no) const
|
562
|
-
{
|
517
|
+
size_t MaskedInvertedLists::list_size(size_t list_no) const {
|
563
518
|
size_t sz = il0->list_size(list_no);
|
564
519
|
return sz ? sz : il1->list_size(list_no);
|
565
520
|
}
|
566
521
|
|
567
|
-
const uint8_t
|
568
|
-
{
|
522
|
+
const uint8_t* MaskedInvertedLists::get_codes(size_t list_no) const {
|
569
523
|
size_t sz = il0->list_size(list_no);
|
570
524
|
return (sz ? il0 : il1)->get_codes(list_no);
|
571
525
|
}
|
572
526
|
|
573
|
-
const idx_t
|
574
|
-
|
575
|
-
|
576
|
-
return (sz ? il0 : il1)->get_ids (list_no);
|
527
|
+
const idx_t* MaskedInvertedLists::get_ids(size_t list_no) const {
|
528
|
+
size_t sz = il0->list_size(list_no);
|
529
|
+
return (sz ? il0 : il1)->get_ids(list_no);
|
577
530
|
}
|
578
531
|
|
579
|
-
void MaskedInvertedLists::release_codes
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
(sz ? il0 : il1)->release_codes (list_no, codes);
|
532
|
+
void MaskedInvertedLists::release_codes(size_t list_no, const uint8_t* codes)
|
533
|
+
const {
|
534
|
+
size_t sz = il0->list_size(list_no);
|
535
|
+
(sz ? il0 : il1)->release_codes(list_no, codes);
|
584
536
|
}
|
585
537
|
|
586
|
-
void MaskedInvertedLists::release_ids
|
587
|
-
|
588
|
-
|
589
|
-
(sz ? il0 : il1)->release_ids (list_no, ids);
|
538
|
+
void MaskedInvertedLists::release_ids(size_t list_no, const idx_t* ids) const {
|
539
|
+
size_t sz = il0->list_size(list_no);
|
540
|
+
(sz ? il0 : il1)->release_ids(list_no, ids);
|
590
541
|
}
|
591
542
|
|
592
|
-
idx_t MaskedInvertedLists::get_single_id
|
593
|
-
|
594
|
-
|
595
|
-
return (sz ? il0 : il1)->get_single_id (list_no, offset);
|
543
|
+
idx_t MaskedInvertedLists::get_single_id(size_t list_no, size_t offset) const {
|
544
|
+
size_t sz = il0->list_size(list_no);
|
545
|
+
return (sz ? il0 : il1)->get_single_id(list_no, offset);
|
596
546
|
}
|
597
547
|
|
598
|
-
const uint8_t
|
599
|
-
|
600
|
-
{
|
601
|
-
size_t sz = il0->list_size
|
602
|
-
return (sz ? il0 : il1)->get_single_code
|
548
|
+
const uint8_t* MaskedInvertedLists::get_single_code(
|
549
|
+
size_t list_no,
|
550
|
+
size_t offset) const {
|
551
|
+
size_t sz = il0->list_size(list_no);
|
552
|
+
return (sz ? il0 : il1)->get_single_code(list_no, offset);
|
603
553
|
}
|
604
554
|
|
605
|
-
void MaskedInvertedLists::prefetch_lists
|
606
|
-
|
607
|
-
{
|
555
|
+
void MaskedInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
556
|
+
const {
|
608
557
|
std::vector<idx_t> list0, list1;
|
609
558
|
for (int i = 0; i < nlist; i++) {
|
610
559
|
idx_t list_no = list_nos[i];
|
611
|
-
if (list_no < 0)
|
560
|
+
if (list_no < 0)
|
561
|
+
continue;
|
612
562
|
size_t sz = il0->list_size(list_no);
|
613
|
-
(sz ? list0 : list1).push_back
|
563
|
+
(sz ? list0 : list1).push_back(list_no);
|
614
564
|
}
|
615
|
-
il0->prefetch_lists
|
616
|
-
il1->prefetch_lists
|
565
|
+
il0->prefetch_lists(list0.data(), list0.size());
|
566
|
+
il1->prefetch_lists(list1.data(), list1.size());
|
617
567
|
}
|
618
568
|
|
619
569
|
/*****************************************
|
620
570
|
* MaskedInvertedLists implementation
|
621
571
|
******************************************/
|
622
572
|
|
573
|
+
StopWordsInvertedLists::StopWordsInvertedLists(
|
574
|
+
const InvertedLists* il0,
|
575
|
+
size_t maxsize)
|
576
|
+
: ReadOnlyInvertedLists(il0->nlist, il0->code_size),
|
577
|
+
il0(il0),
|
578
|
+
maxsize(maxsize) {}
|
623
579
|
|
624
|
-
StopWordsInvertedLists::
|
625
|
-
const InvertedLists *il0, size_t maxsize):
|
626
|
-
ReadOnlyInvertedLists (il0->nlist, il0->code_size),
|
627
|
-
il0 (il0), maxsize (maxsize)
|
628
|
-
{
|
629
|
-
|
630
|
-
}
|
631
|
-
|
632
|
-
size_t StopWordsInvertedLists::list_size(size_t list_no) const
|
633
|
-
{
|
580
|
+
size_t StopWordsInvertedLists::list_size(size_t list_no) const {
|
634
581
|
size_t sz = il0->list_size(list_no);
|
635
582
|
return sz < maxsize ? sz : 0;
|
636
583
|
}
|
637
584
|
|
638
|
-
const uint8_t
|
639
|
-
|
640
|
-
|
585
|
+
const uint8_t* StopWordsInvertedLists::get_codes(size_t list_no) const {
|
586
|
+
return il0->list_size(list_no) < maxsize ? il0->get_codes(list_no)
|
587
|
+
: nullptr;
|
641
588
|
}
|
642
589
|
|
643
|
-
const idx_t
|
644
|
-
{
|
590
|
+
const idx_t* StopWordsInvertedLists::get_ids(size_t list_no) const {
|
645
591
|
return il0->list_size(list_no) < maxsize ? il0->get_ids(list_no) : nullptr;
|
646
592
|
}
|
647
593
|
|
648
|
-
void StopWordsInvertedLists::release_codes
|
649
|
-
|
650
|
-
{
|
651
|
-
|
652
|
-
il0->release_codes (list_no, codes);
|
594
|
+
void StopWordsInvertedLists::release_codes(size_t list_no, const uint8_t* codes)
|
595
|
+
const {
|
596
|
+
if (il0->list_size(list_no) < maxsize) {
|
597
|
+
il0->release_codes(list_no, codes);
|
653
598
|
}
|
654
599
|
}
|
655
600
|
|
656
|
-
void StopWordsInvertedLists::release_ids
|
657
|
-
{
|
658
|
-
if (il0->list_size
|
659
|
-
il0->release_ids
|
601
|
+
void StopWordsInvertedLists::release_ids(size_t list_no, const idx_t* ids)
|
602
|
+
const {
|
603
|
+
if (il0->list_size(list_no) < maxsize) {
|
604
|
+
il0->release_ids(list_no, ids);
|
660
605
|
}
|
661
606
|
}
|
662
607
|
|
663
|
-
idx_t StopWordsInvertedLists::get_single_id
|
664
|
-
{
|
665
|
-
FAISS_THROW_IF_NOT(il0->list_size
|
666
|
-
return il0->get_single_id
|
608
|
+
idx_t StopWordsInvertedLists::get_single_id(size_t list_no, size_t offset)
|
609
|
+
const {
|
610
|
+
FAISS_THROW_IF_NOT(il0->list_size(list_no) < maxsize);
|
611
|
+
return il0->get_single_id(list_no, offset);
|
667
612
|
}
|
668
613
|
|
669
|
-
const uint8_t
|
670
|
-
|
671
|
-
{
|
672
|
-
FAISS_THROW_IF_NOT(il0->list_size
|
673
|
-
return il0->get_single_code
|
614
|
+
const uint8_t* StopWordsInvertedLists::get_single_code(
|
615
|
+
size_t list_no,
|
616
|
+
size_t offset) const {
|
617
|
+
FAISS_THROW_IF_NOT(il0->list_size(list_no) < maxsize);
|
618
|
+
return il0->get_single_code(list_no, offset);
|
674
619
|
}
|
675
620
|
|
676
|
-
void StopWordsInvertedLists::prefetch_lists
|
677
|
-
|
678
|
-
{
|
621
|
+
void StopWordsInvertedLists::prefetch_lists(const idx_t* list_nos, int nlist)
|
622
|
+
const {
|
679
623
|
std::vector<idx_t> list0;
|
680
624
|
for (int i = 0; i < nlist; i++) {
|
681
625
|
idx_t list_no = list_nos[i];
|
682
|
-
if (list_no < 0)
|
626
|
+
if (list_no < 0)
|
627
|
+
continue;
|
683
628
|
if (il0->list_size(list_no) < maxsize) {
|
684
629
|
list0.push_back(list_no);
|
685
630
|
}
|
686
631
|
}
|
687
|
-
il0->prefetch_lists
|
632
|
+
il0->prefetch_lists(list0.data(), list0.size());
|
688
633
|
}
|
689
634
|
|
690
|
-
|
691
|
-
|
692
635
|
} // namespace faiss
|