faiss 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +18 -18
- data/README.md +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/Clustering.cpp +318 -53
- data/vendor/faiss/Clustering.h +39 -11
- data/vendor/faiss/DirectMap.cpp +267 -0
- data/vendor/faiss/DirectMap.h +120 -0
- data/vendor/faiss/IVFlib.cpp +24 -4
- data/vendor/faiss/IVFlib.h +4 -0
- data/vendor/faiss/Index.h +5 -24
- data/vendor/faiss/Index2Layer.cpp +0 -1
- data/vendor/faiss/IndexBinary.h +7 -3
- data/vendor/faiss/IndexBinaryFlat.cpp +5 -0
- data/vendor/faiss/IndexBinaryFlat.h +3 -0
- data/vendor/faiss/IndexBinaryHash.cpp +492 -0
- data/vendor/faiss/IndexBinaryHash.h +116 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +160 -107
- data/vendor/faiss/IndexBinaryIVF.h +14 -4
- data/vendor/faiss/IndexFlat.h +2 -1
- data/vendor/faiss/IndexHNSW.cpp +68 -16
- data/vendor/faiss/IndexHNSW.h +3 -3
- data/vendor/faiss/IndexIVF.cpp +72 -76
- data/vendor/faiss/IndexIVF.h +24 -5
- data/vendor/faiss/IndexIVFFlat.cpp +19 -54
- data/vendor/faiss/IndexIVFFlat.h +1 -11
- data/vendor/faiss/IndexIVFPQ.cpp +49 -26
- data/vendor/faiss/IndexIVFPQ.h +9 -10
- data/vendor/faiss/IndexIVFPQR.cpp +2 -2
- data/vendor/faiss/IndexIVFSpectralHash.cpp +2 -2
- data/vendor/faiss/IndexLSH.h +4 -1
- data/vendor/faiss/IndexPreTransform.cpp +0 -1
- data/vendor/faiss/IndexScalarQuantizer.cpp +8 -1
- data/vendor/faiss/InvertedLists.cpp +0 -2
- data/vendor/faiss/MetaIndexes.cpp +0 -1
- data/vendor/faiss/MetricType.h +36 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +13 -7
- data/vendor/faiss/c_api/Clustering_c.h +11 -5
- data/vendor/faiss/c_api/IndexIVF_c.cpp +7 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +7 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +21 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.h +32 -0
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +185 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +4 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +1 -1
- data/vendor/faiss/gpu/GpuDistance.h +93 -0
- data/vendor/faiss/gpu/GpuIndex.h +7 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +0 -10
- data/vendor/faiss/gpu/GpuIndexIVF.h +1 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +8 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +49 -27
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +110 -2
- data/vendor/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +17 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +14 -3
- data/vendor/faiss/impl/HNSW.cpp +0 -1
- data/vendor/faiss/impl/PolysemousTraining.h +5 -5
- data/vendor/faiss/impl/ProductQuantizer-inl.h +138 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +1 -113
- data/vendor/faiss/impl/ProductQuantizer.h +42 -47
- data/vendor/faiss/impl/index_read.cpp +103 -7
- data/vendor/faiss/impl/index_write.cpp +101 -5
- data/vendor/faiss/impl/io.cpp +111 -1
- data/vendor/faiss/impl/io.h +38 -0
- data/vendor/faiss/index_factory.cpp +0 -1
- data/vendor/faiss/tests/test_merge.cpp +0 -1
- data/vendor/faiss/tests/test_pq_encoding.cpp +6 -6
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +1 -0
- data/vendor/faiss/utils/distances.cpp +4 -5
- data/vendor/faiss/utils/distances_simd.cpp +0 -1
- data/vendor/faiss/utils/hamming.cpp +85 -3
- data/vendor/faiss/utils/hamming.h +20 -0
- data/vendor/faiss/utils/utils.cpp +0 -96
- data/vendor/faiss/utils/utils.h +0 -15
- metadata +11 -3
- data/lib/faiss/ext.bundle +0 -0
data/vendor/faiss/Clustering.h
CHANGED
@@ -26,7 +26,7 @@ struct ClusteringParameters {
|
|
26
26
|
bool verbose;
|
27
27
|
bool spherical; ///< do we want normalized centroids?
|
28
28
|
bool int_centroids; ///< round centroids coordinates to integer
|
29
|
-
bool update_index; ///<
|
29
|
+
bool update_index; ///< re-train index after each iteration?
|
30
30
|
bool frozen_centroids; ///< use the centroids provided as input and do not change them during iterations
|
31
31
|
|
32
32
|
int min_points_per_centroid; ///< otherwise you get a warning
|
@@ -34,12 +34,23 @@ struct ClusteringParameters {
|
|
34
34
|
|
35
35
|
int seed; ///< seed for the random number generator
|
36
36
|
|
37
|
+
size_t decode_block_size; ///< how many vectors at a time to decode
|
38
|
+
|
37
39
|
/// sets reasonable defaults
|
38
40
|
ClusteringParameters ();
|
39
41
|
};
|
40
42
|
|
41
43
|
|
42
|
-
|
44
|
+
struct ClusteringIterationStats {
|
45
|
+
float obj; ///< objective values (sum of distances reported by index)
|
46
|
+
double time; ///< seconds for iteration
|
47
|
+
double time_search; ///< seconds for just search
|
48
|
+
double imbalance_factor; ///< imbalance factor of iteration
|
49
|
+
int nsplit; ///< number of cluster splits
|
50
|
+
};
|
51
|
+
|
52
|
+
|
53
|
+
/** K-means clustering based on assignment - centroid update iterations
|
43
54
|
*
|
44
55
|
* The clustering is based on an Index object that assigns training
|
45
56
|
* points to the centroids. Therefore, at each iteration the centroids
|
@@ -50,27 +61,44 @@ struct ClusteringParameters {
|
|
50
61
|
* centroids table it is not empty on input, it is also used for
|
51
62
|
* initialization.
|
52
63
|
*
|
53
|
-
* To do several clusterings, just call train() several times on
|
54
|
-
* different training sets, clearing the centroid table in between.
|
55
64
|
*/
|
56
65
|
struct Clustering: ClusteringParameters {
|
57
66
|
typedef Index::idx_t idx_t;
|
58
67
|
size_t d; ///< dimension of the vectors
|
59
68
|
size_t k; ///< nb of centroids
|
60
69
|
|
61
|
-
|
70
|
+
/** centroids (k * d)
|
71
|
+
* if centroids are set on input to train, they will be used as initialization
|
72
|
+
*/
|
62
73
|
std::vector<float> centroids;
|
63
74
|
|
64
|
-
///
|
65
|
-
|
66
|
-
std::vector<float> obj;
|
75
|
+
/// stats at every iteration of clustering
|
76
|
+
std::vector<ClusteringIterationStats> iteration_stats;
|
67
77
|
|
68
|
-
/// the only mandatory parameters are k and d
|
69
78
|
Clustering (int d, int k);
|
70
79
|
Clustering (int d, int k, const ClusteringParameters &cp);
|
71
80
|
|
72
|
-
|
73
|
-
|
81
|
+
/** run k-means training
|
82
|
+
*
|
83
|
+
* @param x training vectors, size n * d
|
84
|
+
* @param index index used for assignment
|
85
|
+
* @param x_weights weight associated to each vector: NULL or size n
|
86
|
+
*/
|
87
|
+
virtual void train (idx_t n, const float * x, faiss::Index & index,
|
88
|
+
const float *x_weights = nullptr);
|
89
|
+
|
90
|
+
|
91
|
+
/** run with encoded vectors
|
92
|
+
*
|
93
|
+
* win addition to train()'s parameters takes a codec as parameter
|
94
|
+
* to decode the input vectors.
|
95
|
+
*
|
96
|
+
* @param codec codec used to decode the vectors (nullptr =
|
97
|
+
* vectors are in fact floats) *
|
98
|
+
*/
|
99
|
+
void train_encoded (idx_t nx, const uint8_t *x_in,
|
100
|
+
const Index * codec, Index & index,
|
101
|
+
const float *weights = nullptr);
|
74
102
|
|
75
103
|
/// Post-process the centroids after each centroid update.
|
76
104
|
/// includes optional L2 normalization and nearest integer rounding
|
@@ -0,0 +1,267 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// -*- c++ -*-
|
9
|
+
|
10
|
+
#include <faiss/DirectMap.h>
|
11
|
+
|
12
|
+
#include <cstdio>
|
13
|
+
#include <cassert>
|
14
|
+
|
15
|
+
#include <faiss/impl/FaissAssert.h>
|
16
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
17
|
+
|
18
|
+
namespace faiss {
|
19
|
+
|
20
|
+
DirectMap::DirectMap(): type(NoMap)
|
21
|
+
{}
|
22
|
+
|
23
|
+
void DirectMap::set_type (Type new_type, const InvertedLists *invlists, size_t ntotal) {
|
24
|
+
|
25
|
+
FAISS_THROW_IF_NOT (new_type == NoMap || new_type == Array ||
|
26
|
+
new_type == Hashtable);
|
27
|
+
|
28
|
+
if (new_type == type) {
|
29
|
+
// nothing to do
|
30
|
+
return;
|
31
|
+
}
|
32
|
+
|
33
|
+
array.clear ();
|
34
|
+
hashtable.clear ();
|
35
|
+
type = new_type;
|
36
|
+
|
37
|
+
if (new_type == NoMap) {
|
38
|
+
return;
|
39
|
+
} else if (new_type == Array) {
|
40
|
+
array.resize (ntotal, -1);
|
41
|
+
} else if (new_type == Hashtable) {
|
42
|
+
hashtable.reserve (ntotal);
|
43
|
+
}
|
44
|
+
|
45
|
+
for (size_t key = 0; key < invlists->nlist; key++) {
|
46
|
+
size_t list_size = invlists->list_size (key);
|
47
|
+
InvertedLists::ScopedIds idlist (invlists, key);
|
48
|
+
|
49
|
+
if (new_type == Array) {
|
50
|
+
for (long ofs = 0; ofs < list_size; ofs++) {
|
51
|
+
FAISS_THROW_IF_NOT_MSG (
|
52
|
+
0 <= idlist [ofs] && idlist[ofs] < ntotal,
|
53
|
+
"direct map supported only for seuquential ids");
|
54
|
+
array [idlist [ofs]] = lo_build(key, ofs);
|
55
|
+
}
|
56
|
+
} else if (new_type == Hashtable) {
|
57
|
+
for (long ofs = 0; ofs < list_size; ofs++) {
|
58
|
+
hashtable [idlist [ofs]] = lo_build(key, ofs);
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
void DirectMap::clear()
|
65
|
+
{
|
66
|
+
array.clear ();
|
67
|
+
hashtable.clear ();
|
68
|
+
}
|
69
|
+
|
70
|
+
|
71
|
+
DirectMap::idx_t DirectMap::get (idx_t key) const
|
72
|
+
{
|
73
|
+
if (type == Array) {
|
74
|
+
FAISS_THROW_IF_NOT_MSG (
|
75
|
+
key >= 0 && key < array.size(), "invalid key"
|
76
|
+
);
|
77
|
+
idx_t lo = array[key];
|
78
|
+
FAISS_THROW_IF_NOT_MSG(lo >= 0, "-1 entry in direct_map");
|
79
|
+
return lo;
|
80
|
+
} else if (type == Hashtable) {
|
81
|
+
auto res = hashtable.find (key);
|
82
|
+
FAISS_THROW_IF_NOT_MSG (res != hashtable.end(), "key not found");
|
83
|
+
return res->second;
|
84
|
+
} else {
|
85
|
+
FAISS_THROW_MSG ("direct map not initialized");
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
89
|
+
|
90
|
+
|
91
|
+
void DirectMap::add_single_id (idx_t id, idx_t list_no, size_t offset)
|
92
|
+
{
|
93
|
+
if (type == NoMap) return;
|
94
|
+
|
95
|
+
if (type == Array) {
|
96
|
+
assert (id == array.size());
|
97
|
+
if (list_no >= 0) {
|
98
|
+
array.push_back (lo_build (list_no, offset));
|
99
|
+
} else {
|
100
|
+
array.push_back (-1);
|
101
|
+
}
|
102
|
+
} else if (type == Hashtable) {
|
103
|
+
if (list_no >= 0) {
|
104
|
+
hashtable[id] = lo_build (list_no, offset);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
}
|
109
|
+
|
110
|
+
void DirectMap::check_can_add (const idx_t *ids) {
|
111
|
+
if (type == Array && ids) {
|
112
|
+
FAISS_THROW_MSG ("cannot have array direct map and add with ids");
|
113
|
+
}
|
114
|
+
}
|
115
|
+
|
116
|
+
/********************* DirectMapAdd implementation */
|
117
|
+
|
118
|
+
|
119
|
+
DirectMapAdd::DirectMapAdd (DirectMap &direct_map, size_t n, const idx_t *xids):
|
120
|
+
direct_map(direct_map), type(direct_map.type), n(n), xids(xids)
|
121
|
+
{
|
122
|
+
if (type == DirectMap::Array) {
|
123
|
+
FAISS_THROW_IF_NOT (xids == nullptr);
|
124
|
+
ntotal = direct_map.array.size();
|
125
|
+
direct_map.array.resize (ntotal + n, -1);
|
126
|
+
} else if (type == DirectMap::Hashtable) {
|
127
|
+
// can't parallel update hashtable so use temp array
|
128
|
+
all_ofs.resize (n, -1);
|
129
|
+
}
|
130
|
+
}
|
131
|
+
|
132
|
+
|
133
|
+
void DirectMapAdd::add (size_t i, idx_t list_no, size_t ofs)
|
134
|
+
{
|
135
|
+
if (type == DirectMap::Array) {
|
136
|
+
direct_map.array [ntotal + i] = lo_build (list_no, ofs);
|
137
|
+
} else if (type == DirectMap::Hashtable) {
|
138
|
+
all_ofs [i] = lo_build (list_no, ofs);
|
139
|
+
}
|
140
|
+
}
|
141
|
+
|
142
|
+
DirectMapAdd::~DirectMapAdd ()
|
143
|
+
{
|
144
|
+
if (type == DirectMap::Hashtable) {
|
145
|
+
for (int i = 0; i < n; i++) {
|
146
|
+
idx_t id = xids ? xids[i] : ntotal + i;
|
147
|
+
direct_map.hashtable [id] = all_ofs [i];
|
148
|
+
}
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
152
|
+
/********************************************************/
|
153
|
+
|
154
|
+
using ScopedCodes = InvertedLists::ScopedCodes;
|
155
|
+
using ScopedIds = InvertedLists::ScopedIds;
|
156
|
+
|
157
|
+
|
158
|
+
size_t DirectMap::remove_ids(const IDSelector& sel, InvertedLists *invlists)
|
159
|
+
{
|
160
|
+
size_t nlist = invlists->nlist;
|
161
|
+
std::vector<idx_t> toremove(nlist);
|
162
|
+
|
163
|
+
size_t nremove = 0;
|
164
|
+
|
165
|
+
if (type == NoMap) {
|
166
|
+
// exhaustive scan of IVF
|
167
|
+
#pragma omp parallel for
|
168
|
+
for (idx_t i = 0; i < nlist; i++) {
|
169
|
+
idx_t l0 = invlists->list_size (i), l = l0, j = 0;
|
170
|
+
ScopedIds idsi (invlists, i);
|
171
|
+
while (j < l) {
|
172
|
+
if (sel.is_member (idsi[j])) {
|
173
|
+
l--;
|
174
|
+
invlists->update_entry (
|
175
|
+
i, j,
|
176
|
+
invlists->get_single_id (i, l),
|
177
|
+
ScopedCodes (invlists, i, l).get()
|
178
|
+
);
|
179
|
+
} else {
|
180
|
+
j++;
|
181
|
+
}
|
182
|
+
}
|
183
|
+
toremove[i] = l0 - l;
|
184
|
+
}
|
185
|
+
// this will not run well in parallel on ondisk because of
|
186
|
+
// possible shrinks
|
187
|
+
for (idx_t i = 0; i < nlist; i++) {
|
188
|
+
if (toremove[i] > 0) {
|
189
|
+
nremove += toremove[i];
|
190
|
+
invlists->resize(i, invlists->list_size(i) - toremove[i]);
|
191
|
+
}
|
192
|
+
}
|
193
|
+
} else if (type == Hashtable) {
|
194
|
+
const IDSelectorArray *sela =
|
195
|
+
dynamic_cast<const IDSelectorArray*>(&sel);
|
196
|
+
FAISS_THROW_IF_NOT_MSG (
|
197
|
+
sela,
|
198
|
+
"remove with hashtable works only with IDSelectorArray"
|
199
|
+
);
|
200
|
+
|
201
|
+
for (idx_t i = 0; i < sela->n; i++) {
|
202
|
+
idx_t id = sela->ids[i];
|
203
|
+
auto res = hashtable.find (id);
|
204
|
+
if (res != hashtable.end()) {
|
205
|
+
size_t list_no = lo_listno (res->second);
|
206
|
+
size_t offset = lo_offset (res->second);
|
207
|
+
idx_t last = invlists->list_size (list_no) - 1;
|
208
|
+
hashtable.erase (res);
|
209
|
+
if (offset < last) {
|
210
|
+
idx_t last_id = invlists->get_single_id (list_no, last);
|
211
|
+
invlists->update_entry (
|
212
|
+
list_no, offset,
|
213
|
+
last_id,
|
214
|
+
ScopedCodes (invlists, list_no, last).get()
|
215
|
+
);
|
216
|
+
// update hash entry for last element
|
217
|
+
hashtable [last_id] = list_no << 32 | offset;
|
218
|
+
}
|
219
|
+
invlists->resize(list_no, last);
|
220
|
+
nremove++;
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
} else {
|
225
|
+
FAISS_THROW_MSG("remove not supported with this direct_map format");
|
226
|
+
}
|
227
|
+
return nremove;
|
228
|
+
}
|
229
|
+
|
230
|
+
void DirectMap::update_codes (InvertedLists *invlists,
|
231
|
+
int n, const idx_t *ids,
|
232
|
+
const idx_t *assign,
|
233
|
+
const uint8_t *codes)
|
234
|
+
{
|
235
|
+
FAISS_THROW_IF_NOT (type == Array);
|
236
|
+
|
237
|
+
size_t code_size = invlists->code_size;
|
238
|
+
|
239
|
+
for (size_t i = 0; i < n; i++) {
|
240
|
+
idx_t id = ids[i];
|
241
|
+
FAISS_THROW_IF_NOT_MSG (0 <= id && id < array.size(),
|
242
|
+
"id to update out of range");
|
243
|
+
{ // remove old one
|
244
|
+
idx_t dm = array [id];
|
245
|
+
int64_t ofs = lo_offset (dm);
|
246
|
+
int64_t il = lo_listno (dm);
|
247
|
+
size_t l = invlists->list_size (il);
|
248
|
+
if (ofs != l - 1) { // move l - 1 to ofs
|
249
|
+
int64_t id2 = invlists->get_single_id (il, l - 1);
|
250
|
+
array[id2] = lo_build (il, ofs);
|
251
|
+
invlists->update_entry (il, ofs, id2,
|
252
|
+
invlists->get_single_code (il, l - 1));
|
253
|
+
}
|
254
|
+
invlists->resize (il, l - 1);
|
255
|
+
}
|
256
|
+
{ // insert new one
|
257
|
+
int64_t il = assign[i];
|
258
|
+
size_t l = invlists->list_size (il);
|
259
|
+
idx_t dm = lo_build (il, l);
|
260
|
+
array [id] = dm;
|
261
|
+
invlists->add_entry (il, id, codes + i * code_size);
|
262
|
+
}
|
263
|
+
}
|
264
|
+
}
|
265
|
+
|
266
|
+
|
267
|
+
}
|
@@ -0,0 +1,120 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// -*- c++ -*-
|
9
|
+
|
10
|
+
#ifndef FAISS_DIRECT_MAP_H
|
11
|
+
#define FAISS_DIRECT_MAP_H
|
12
|
+
|
13
|
+
#include <faiss/InvertedLists.h>
|
14
|
+
#include <unordered_map>
|
15
|
+
|
16
|
+
|
17
|
+
namespace faiss {
|
18
|
+
|
19
|
+
// When offsets list id + offset are encoded in an uint64
|
20
|
+
// we call this LO = list-offset
|
21
|
+
|
22
|
+
inline uint64_t lo_build (uint64_t list_id, uint64_t offset) {
|
23
|
+
return list_id << 32 | offset;
|
24
|
+
}
|
25
|
+
|
26
|
+
inline uint64_t lo_listno (uint64_t lo) {
|
27
|
+
return lo >> 32;
|
28
|
+
}
|
29
|
+
|
30
|
+
inline uint64_t lo_offset (uint64_t lo) {
|
31
|
+
return lo & 0xffffffff;
|
32
|
+
}
|
33
|
+
|
34
|
+
/**
|
35
|
+
* Direct map: a way to map back from ids to inverted lists
|
36
|
+
*/
|
37
|
+
struct DirectMap {
|
38
|
+
typedef Index::idx_t idx_t;
|
39
|
+
|
40
|
+
enum Type {
|
41
|
+
NoMap = 0, // default
|
42
|
+
Array = 1, // sequential ids (only for add, no add_with_ids)
|
43
|
+
Hashtable = 2 // arbitrary ids
|
44
|
+
};
|
45
|
+
Type type;
|
46
|
+
|
47
|
+
/// map for direct access to the elements. Map ids to LO-encoded entries.
|
48
|
+
std::vector <idx_t> array;
|
49
|
+
std::unordered_map <idx_t, idx_t> hashtable;
|
50
|
+
|
51
|
+
DirectMap();
|
52
|
+
|
53
|
+
/// set type and initialize
|
54
|
+
void set_type (Type new_type, const InvertedLists *invlists, size_t ntotal);
|
55
|
+
|
56
|
+
/// get an entry
|
57
|
+
idx_t get (idx_t id) const;
|
58
|
+
|
59
|
+
/// for quick checks
|
60
|
+
bool no () const {return type == NoMap; }
|
61
|
+
|
62
|
+
/**
|
63
|
+
* update the direct_map
|
64
|
+
*/
|
65
|
+
|
66
|
+
/// throw if Array and ids is not NULL
|
67
|
+
void check_can_add (const idx_t *ids);
|
68
|
+
|
69
|
+
/// non thread-safe version
|
70
|
+
void add_single_id (idx_t id, idx_t list_no, size_t offset);
|
71
|
+
|
72
|
+
/// remove all entries
|
73
|
+
void clear();
|
74
|
+
|
75
|
+
/**
|
76
|
+
* operations on inverted lists that require translation with a DirectMap
|
77
|
+
*/
|
78
|
+
|
79
|
+
/// remove ids from the InvertedLists, possibly using the direct map
|
80
|
+
size_t remove_ids(const IDSelector& sel, InvertedLists *invlists);
|
81
|
+
|
82
|
+
/// update entries, using the direct map
|
83
|
+
void update_codes (InvertedLists *invlists,
|
84
|
+
int n, const idx_t *ids,
|
85
|
+
const idx_t *list_nos,
|
86
|
+
const uint8_t *codes);
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
};
|
91
|
+
|
92
|
+
/// Thread-safe way of updating the direct_map
|
93
|
+
struct DirectMapAdd {
|
94
|
+
|
95
|
+
typedef Index::idx_t idx_t;
|
96
|
+
|
97
|
+
using Type = DirectMap::Type;
|
98
|
+
|
99
|
+
DirectMap &direct_map;
|
100
|
+
DirectMap::Type type;
|
101
|
+
size_t ntotal;
|
102
|
+
size_t n;
|
103
|
+
const idx_t *xids;
|
104
|
+
|
105
|
+
std::vector<idx_t> all_ofs;
|
106
|
+
|
107
|
+
DirectMapAdd (DirectMap &direct_map, size_t n, const idx_t *xids);
|
108
|
+
|
109
|
+
/// add vector i (with id xids[i]) at list_no and offset
|
110
|
+
void add (size_t i, idx_t list_no, size_t offset);
|
111
|
+
|
112
|
+
~DirectMapAdd ();
|
113
|
+
};
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
}
|
118
|
+
|
119
|
+
|
120
|
+
#endif
|