faiss 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +18 -18
- data/README.md +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/Clustering.cpp +318 -53
- data/vendor/faiss/Clustering.h +39 -11
- data/vendor/faiss/DirectMap.cpp +267 -0
- data/vendor/faiss/DirectMap.h +120 -0
- data/vendor/faiss/IVFlib.cpp +24 -4
- data/vendor/faiss/IVFlib.h +4 -0
- data/vendor/faiss/Index.h +5 -24
- data/vendor/faiss/Index2Layer.cpp +0 -1
- data/vendor/faiss/IndexBinary.h +7 -3
- data/vendor/faiss/IndexBinaryFlat.cpp +5 -0
- data/vendor/faiss/IndexBinaryFlat.h +3 -0
- data/vendor/faiss/IndexBinaryHash.cpp +492 -0
- data/vendor/faiss/IndexBinaryHash.h +116 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +160 -107
- data/vendor/faiss/IndexBinaryIVF.h +14 -4
- data/vendor/faiss/IndexFlat.h +2 -1
- data/vendor/faiss/IndexHNSW.cpp +68 -16
- data/vendor/faiss/IndexHNSW.h +3 -3
- data/vendor/faiss/IndexIVF.cpp +72 -76
- data/vendor/faiss/IndexIVF.h +24 -5
- data/vendor/faiss/IndexIVFFlat.cpp +19 -54
- data/vendor/faiss/IndexIVFFlat.h +1 -11
- data/vendor/faiss/IndexIVFPQ.cpp +49 -26
- data/vendor/faiss/IndexIVFPQ.h +9 -10
- data/vendor/faiss/IndexIVFPQR.cpp +2 -2
- data/vendor/faiss/IndexIVFSpectralHash.cpp +2 -2
- data/vendor/faiss/IndexLSH.h +4 -1
- data/vendor/faiss/IndexPreTransform.cpp +0 -1
- data/vendor/faiss/IndexScalarQuantizer.cpp +8 -1
- data/vendor/faiss/InvertedLists.cpp +0 -2
- data/vendor/faiss/MetaIndexes.cpp +0 -1
- data/vendor/faiss/MetricType.h +36 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +13 -7
- data/vendor/faiss/c_api/Clustering_c.h +11 -5
- data/vendor/faiss/c_api/IndexIVF_c.cpp +7 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +7 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +21 -0
- data/vendor/faiss/c_api/IndexPreTransform_c.h +32 -0
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +185 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +4 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +1 -1
- data/vendor/faiss/gpu/GpuDistance.h +93 -0
- data/vendor/faiss/gpu/GpuIndex.h +7 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +0 -10
- data/vendor/faiss/gpu/GpuIndexIVF.h +1 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +8 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +49 -27
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +110 -2
- data/vendor/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +17 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +14 -3
- data/vendor/faiss/impl/HNSW.cpp +0 -1
- data/vendor/faiss/impl/PolysemousTraining.h +5 -5
- data/vendor/faiss/impl/ProductQuantizer-inl.h +138 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +1 -113
- data/vendor/faiss/impl/ProductQuantizer.h +42 -47
- data/vendor/faiss/impl/index_read.cpp +103 -7
- data/vendor/faiss/impl/index_write.cpp +101 -5
- data/vendor/faiss/impl/io.cpp +111 -1
- data/vendor/faiss/impl/io.h +38 -0
- data/vendor/faiss/index_factory.cpp +0 -1
- data/vendor/faiss/tests/test_merge.cpp +0 -1
- data/vendor/faiss/tests/test_pq_encoding.cpp +6 -6
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +1 -0
- data/vendor/faiss/utils/distances.cpp +4 -5
- data/vendor/faiss/utils/distances_simd.cpp +0 -1
- data/vendor/faiss/utils/hamming.cpp +85 -3
- data/vendor/faiss/utils/hamming.h +20 -0
- data/vendor/faiss/utils/utils.cpp +0 -96
- data/vendor/faiss/utils/utils.h +0 -15
- metadata +11 -3
- data/lib/faiss/ext.bundle +0 -0
data/vendor/faiss/Clustering.h
CHANGED
@@ -26,7 +26,7 @@ struct ClusteringParameters {
|
|
26
26
|
bool verbose;
|
27
27
|
bool spherical; ///< do we want normalized centroids?
|
28
28
|
bool int_centroids; ///< round centroids coordinates to integer
|
29
|
-
bool update_index; ///<
|
29
|
+
bool update_index; ///< re-train index after each iteration?
|
30
30
|
bool frozen_centroids; ///< use the centroids provided as input and do not change them during iterations
|
31
31
|
|
32
32
|
int min_points_per_centroid; ///< otherwise you get a warning
|
@@ -34,12 +34,23 @@ struct ClusteringParameters {
|
|
34
34
|
|
35
35
|
int seed; ///< seed for the random number generator
|
36
36
|
|
37
|
+
size_t decode_block_size; ///< how many vectors at a time to decode
|
38
|
+
|
37
39
|
/// sets reasonable defaults
|
38
40
|
ClusteringParameters ();
|
39
41
|
};
|
40
42
|
|
41
43
|
|
42
|
-
|
44
|
+
struct ClusteringIterationStats {
|
45
|
+
float obj; ///< objective values (sum of distances reported by index)
|
46
|
+
double time; ///< seconds for iteration
|
47
|
+
double time_search; ///< seconds for just search
|
48
|
+
double imbalance_factor; ///< imbalance factor of iteration
|
49
|
+
int nsplit; ///< number of cluster splits
|
50
|
+
};
|
51
|
+
|
52
|
+
|
53
|
+
/** K-means clustering based on assignment - centroid update iterations
|
43
54
|
*
|
44
55
|
* The clustering is based on an Index object that assigns training
|
45
56
|
* points to the centroids. Therefore, at each iteration the centroids
|
@@ -50,27 +61,44 @@ struct ClusteringParameters {
|
|
50
61
|
* centroids table it is not empty on input, it is also used for
|
51
62
|
* initialization.
|
52
63
|
*
|
53
|
-
* To do several clusterings, just call train() several times on
|
54
|
-
* different training sets, clearing the centroid table in between.
|
55
64
|
*/
|
56
65
|
struct Clustering: ClusteringParameters {
|
57
66
|
typedef Index::idx_t idx_t;
|
58
67
|
size_t d; ///< dimension of the vectors
|
59
68
|
size_t k; ///< nb of centroids
|
60
69
|
|
61
|
-
|
70
|
+
/** centroids (k * d)
|
71
|
+
* if centroids are set on input to train, they will be used as initialization
|
72
|
+
*/
|
62
73
|
std::vector<float> centroids;
|
63
74
|
|
64
|
-
///
|
65
|
-
|
66
|
-
std::vector<float> obj;
|
75
|
+
/// stats at every iteration of clustering
|
76
|
+
std::vector<ClusteringIterationStats> iteration_stats;
|
67
77
|
|
68
|
-
/// the only mandatory parameters are k and d
|
69
78
|
Clustering (int d, int k);
|
70
79
|
Clustering (int d, int k, const ClusteringParameters &cp);
|
71
80
|
|
72
|
-
|
73
|
-
|
81
|
+
/** run k-means training
|
82
|
+
*
|
83
|
+
* @param x training vectors, size n * d
|
84
|
+
* @param index index used for assignment
|
85
|
+
* @param x_weights weight associated to each vector: NULL or size n
|
86
|
+
*/
|
87
|
+
virtual void train (idx_t n, const float * x, faiss::Index & index,
|
88
|
+
const float *x_weights = nullptr);
|
89
|
+
|
90
|
+
|
91
|
+
/** run with encoded vectors
|
92
|
+
*
|
93
|
+
* win addition to train()'s parameters takes a codec as parameter
|
94
|
+
* to decode the input vectors.
|
95
|
+
*
|
96
|
+
* @param codec codec used to decode the vectors (nullptr =
|
97
|
+
* vectors are in fact floats) *
|
98
|
+
*/
|
99
|
+
void train_encoded (idx_t nx, const uint8_t *x_in,
|
100
|
+
const Index * codec, Index & index,
|
101
|
+
const float *weights = nullptr);
|
74
102
|
|
75
103
|
/// Post-process the centroids after each centroid update.
|
76
104
|
/// includes optional L2 normalization and nearest integer rounding
|
@@ -0,0 +1,267 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// -*- c++ -*-
|
9
|
+
|
10
|
+
#include <faiss/DirectMap.h>
|
11
|
+
|
12
|
+
#include <cstdio>
|
13
|
+
#include <cassert>
|
14
|
+
|
15
|
+
#include <faiss/impl/FaissAssert.h>
|
16
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
17
|
+
|
18
|
+
namespace faiss {
|
19
|
+
|
20
|
+
DirectMap::DirectMap(): type(NoMap)
|
21
|
+
{}
|
22
|
+
|
23
|
+
void DirectMap::set_type (Type new_type, const InvertedLists *invlists, size_t ntotal) {
|
24
|
+
|
25
|
+
FAISS_THROW_IF_NOT (new_type == NoMap || new_type == Array ||
|
26
|
+
new_type == Hashtable);
|
27
|
+
|
28
|
+
if (new_type == type) {
|
29
|
+
// nothing to do
|
30
|
+
return;
|
31
|
+
}
|
32
|
+
|
33
|
+
array.clear ();
|
34
|
+
hashtable.clear ();
|
35
|
+
type = new_type;
|
36
|
+
|
37
|
+
if (new_type == NoMap) {
|
38
|
+
return;
|
39
|
+
} else if (new_type == Array) {
|
40
|
+
array.resize (ntotal, -1);
|
41
|
+
} else if (new_type == Hashtable) {
|
42
|
+
hashtable.reserve (ntotal);
|
43
|
+
}
|
44
|
+
|
45
|
+
for (size_t key = 0; key < invlists->nlist; key++) {
|
46
|
+
size_t list_size = invlists->list_size (key);
|
47
|
+
InvertedLists::ScopedIds idlist (invlists, key);
|
48
|
+
|
49
|
+
if (new_type == Array) {
|
50
|
+
for (long ofs = 0; ofs < list_size; ofs++) {
|
51
|
+
FAISS_THROW_IF_NOT_MSG (
|
52
|
+
0 <= idlist [ofs] && idlist[ofs] < ntotal,
|
53
|
+
"direct map supported only for seuquential ids");
|
54
|
+
array [idlist [ofs]] = lo_build(key, ofs);
|
55
|
+
}
|
56
|
+
} else if (new_type == Hashtable) {
|
57
|
+
for (long ofs = 0; ofs < list_size; ofs++) {
|
58
|
+
hashtable [idlist [ofs]] = lo_build(key, ofs);
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
void DirectMap::clear()
|
65
|
+
{
|
66
|
+
array.clear ();
|
67
|
+
hashtable.clear ();
|
68
|
+
}
|
69
|
+
|
70
|
+
|
71
|
+
DirectMap::idx_t DirectMap::get (idx_t key) const
|
72
|
+
{
|
73
|
+
if (type == Array) {
|
74
|
+
FAISS_THROW_IF_NOT_MSG (
|
75
|
+
key >= 0 && key < array.size(), "invalid key"
|
76
|
+
);
|
77
|
+
idx_t lo = array[key];
|
78
|
+
FAISS_THROW_IF_NOT_MSG(lo >= 0, "-1 entry in direct_map");
|
79
|
+
return lo;
|
80
|
+
} else if (type == Hashtable) {
|
81
|
+
auto res = hashtable.find (key);
|
82
|
+
FAISS_THROW_IF_NOT_MSG (res != hashtable.end(), "key not found");
|
83
|
+
return res->second;
|
84
|
+
} else {
|
85
|
+
FAISS_THROW_MSG ("direct map not initialized");
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
89
|
+
|
90
|
+
|
91
|
+
void DirectMap::add_single_id (idx_t id, idx_t list_no, size_t offset)
|
92
|
+
{
|
93
|
+
if (type == NoMap) return;
|
94
|
+
|
95
|
+
if (type == Array) {
|
96
|
+
assert (id == array.size());
|
97
|
+
if (list_no >= 0) {
|
98
|
+
array.push_back (lo_build (list_no, offset));
|
99
|
+
} else {
|
100
|
+
array.push_back (-1);
|
101
|
+
}
|
102
|
+
} else if (type == Hashtable) {
|
103
|
+
if (list_no >= 0) {
|
104
|
+
hashtable[id] = lo_build (list_no, offset);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
}
|
109
|
+
|
110
|
+
void DirectMap::check_can_add (const idx_t *ids) {
|
111
|
+
if (type == Array && ids) {
|
112
|
+
FAISS_THROW_MSG ("cannot have array direct map and add with ids");
|
113
|
+
}
|
114
|
+
}
|
115
|
+
|
116
|
+
/********************* DirectMapAdd implementation */
|
117
|
+
|
118
|
+
|
119
|
+
DirectMapAdd::DirectMapAdd (DirectMap &direct_map, size_t n, const idx_t *xids):
|
120
|
+
direct_map(direct_map), type(direct_map.type), n(n), xids(xids)
|
121
|
+
{
|
122
|
+
if (type == DirectMap::Array) {
|
123
|
+
FAISS_THROW_IF_NOT (xids == nullptr);
|
124
|
+
ntotal = direct_map.array.size();
|
125
|
+
direct_map.array.resize (ntotal + n, -1);
|
126
|
+
} else if (type == DirectMap::Hashtable) {
|
127
|
+
// can't parallel update hashtable so use temp array
|
128
|
+
all_ofs.resize (n, -1);
|
129
|
+
}
|
130
|
+
}
|
131
|
+
|
132
|
+
|
133
|
+
void DirectMapAdd::add (size_t i, idx_t list_no, size_t ofs)
|
134
|
+
{
|
135
|
+
if (type == DirectMap::Array) {
|
136
|
+
direct_map.array [ntotal + i] = lo_build (list_no, ofs);
|
137
|
+
} else if (type == DirectMap::Hashtable) {
|
138
|
+
all_ofs [i] = lo_build (list_no, ofs);
|
139
|
+
}
|
140
|
+
}
|
141
|
+
|
142
|
+
DirectMapAdd::~DirectMapAdd ()
|
143
|
+
{
|
144
|
+
if (type == DirectMap::Hashtable) {
|
145
|
+
for (int i = 0; i < n; i++) {
|
146
|
+
idx_t id = xids ? xids[i] : ntotal + i;
|
147
|
+
direct_map.hashtable [id] = all_ofs [i];
|
148
|
+
}
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
152
|
+
/********************************************************/
|
153
|
+
|
154
|
+
using ScopedCodes = InvertedLists::ScopedCodes;
|
155
|
+
using ScopedIds = InvertedLists::ScopedIds;
|
156
|
+
|
157
|
+
|
158
|
+
size_t DirectMap::remove_ids(const IDSelector& sel, InvertedLists *invlists)
|
159
|
+
{
|
160
|
+
size_t nlist = invlists->nlist;
|
161
|
+
std::vector<idx_t> toremove(nlist);
|
162
|
+
|
163
|
+
size_t nremove = 0;
|
164
|
+
|
165
|
+
if (type == NoMap) {
|
166
|
+
// exhaustive scan of IVF
|
167
|
+
#pragma omp parallel for
|
168
|
+
for (idx_t i = 0; i < nlist; i++) {
|
169
|
+
idx_t l0 = invlists->list_size (i), l = l0, j = 0;
|
170
|
+
ScopedIds idsi (invlists, i);
|
171
|
+
while (j < l) {
|
172
|
+
if (sel.is_member (idsi[j])) {
|
173
|
+
l--;
|
174
|
+
invlists->update_entry (
|
175
|
+
i, j,
|
176
|
+
invlists->get_single_id (i, l),
|
177
|
+
ScopedCodes (invlists, i, l).get()
|
178
|
+
);
|
179
|
+
} else {
|
180
|
+
j++;
|
181
|
+
}
|
182
|
+
}
|
183
|
+
toremove[i] = l0 - l;
|
184
|
+
}
|
185
|
+
// this will not run well in parallel on ondisk because of
|
186
|
+
// possible shrinks
|
187
|
+
for (idx_t i = 0; i < nlist; i++) {
|
188
|
+
if (toremove[i] > 0) {
|
189
|
+
nremove += toremove[i];
|
190
|
+
invlists->resize(i, invlists->list_size(i) - toremove[i]);
|
191
|
+
}
|
192
|
+
}
|
193
|
+
} else if (type == Hashtable) {
|
194
|
+
const IDSelectorArray *sela =
|
195
|
+
dynamic_cast<const IDSelectorArray*>(&sel);
|
196
|
+
FAISS_THROW_IF_NOT_MSG (
|
197
|
+
sela,
|
198
|
+
"remove with hashtable works only with IDSelectorArray"
|
199
|
+
);
|
200
|
+
|
201
|
+
for (idx_t i = 0; i < sela->n; i++) {
|
202
|
+
idx_t id = sela->ids[i];
|
203
|
+
auto res = hashtable.find (id);
|
204
|
+
if (res != hashtable.end()) {
|
205
|
+
size_t list_no = lo_listno (res->second);
|
206
|
+
size_t offset = lo_offset (res->second);
|
207
|
+
idx_t last = invlists->list_size (list_no) - 1;
|
208
|
+
hashtable.erase (res);
|
209
|
+
if (offset < last) {
|
210
|
+
idx_t last_id = invlists->get_single_id (list_no, last);
|
211
|
+
invlists->update_entry (
|
212
|
+
list_no, offset,
|
213
|
+
last_id,
|
214
|
+
ScopedCodes (invlists, list_no, last).get()
|
215
|
+
);
|
216
|
+
// update hash entry for last element
|
217
|
+
hashtable [last_id] = list_no << 32 | offset;
|
218
|
+
}
|
219
|
+
invlists->resize(list_no, last);
|
220
|
+
nremove++;
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
} else {
|
225
|
+
FAISS_THROW_MSG("remove not supported with this direct_map format");
|
226
|
+
}
|
227
|
+
return nremove;
|
228
|
+
}
|
229
|
+
|
230
|
+
void DirectMap::update_codes (InvertedLists *invlists,
|
231
|
+
int n, const idx_t *ids,
|
232
|
+
const idx_t *assign,
|
233
|
+
const uint8_t *codes)
|
234
|
+
{
|
235
|
+
FAISS_THROW_IF_NOT (type == Array);
|
236
|
+
|
237
|
+
size_t code_size = invlists->code_size;
|
238
|
+
|
239
|
+
for (size_t i = 0; i < n; i++) {
|
240
|
+
idx_t id = ids[i];
|
241
|
+
FAISS_THROW_IF_NOT_MSG (0 <= id && id < array.size(),
|
242
|
+
"id to update out of range");
|
243
|
+
{ // remove old one
|
244
|
+
idx_t dm = array [id];
|
245
|
+
int64_t ofs = lo_offset (dm);
|
246
|
+
int64_t il = lo_listno (dm);
|
247
|
+
size_t l = invlists->list_size (il);
|
248
|
+
if (ofs != l - 1) { // move l - 1 to ofs
|
249
|
+
int64_t id2 = invlists->get_single_id (il, l - 1);
|
250
|
+
array[id2] = lo_build (il, ofs);
|
251
|
+
invlists->update_entry (il, ofs, id2,
|
252
|
+
invlists->get_single_code (il, l - 1));
|
253
|
+
}
|
254
|
+
invlists->resize (il, l - 1);
|
255
|
+
}
|
256
|
+
{ // insert new one
|
257
|
+
int64_t il = assign[i];
|
258
|
+
size_t l = invlists->list_size (il);
|
259
|
+
idx_t dm = lo_build (il, l);
|
260
|
+
array [id] = dm;
|
261
|
+
invlists->add_entry (il, id, codes + i * code_size);
|
262
|
+
}
|
263
|
+
}
|
264
|
+
}
|
265
|
+
|
266
|
+
|
267
|
+
}
|
@@ -0,0 +1,120 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// -*- c++ -*-
|
9
|
+
|
10
|
+
#ifndef FAISS_DIRECT_MAP_H
|
11
|
+
#define FAISS_DIRECT_MAP_H
|
12
|
+
|
13
|
+
#include <faiss/InvertedLists.h>
|
14
|
+
#include <unordered_map>
|
15
|
+
|
16
|
+
|
17
|
+
namespace faiss {
|
18
|
+
|
19
|
+
// When offsets list id + offset are encoded in an uint64
|
20
|
+
// we call this LO = list-offset
|
21
|
+
|
22
|
+
inline uint64_t lo_build (uint64_t list_id, uint64_t offset) {
|
23
|
+
return list_id << 32 | offset;
|
24
|
+
}
|
25
|
+
|
26
|
+
inline uint64_t lo_listno (uint64_t lo) {
|
27
|
+
return lo >> 32;
|
28
|
+
}
|
29
|
+
|
30
|
+
inline uint64_t lo_offset (uint64_t lo) {
|
31
|
+
return lo & 0xffffffff;
|
32
|
+
}
|
33
|
+
|
34
|
+
/**
|
35
|
+
* Direct map: a way to map back from ids to inverted lists
|
36
|
+
*/
|
37
|
+
struct DirectMap {
|
38
|
+
typedef Index::idx_t idx_t;
|
39
|
+
|
40
|
+
enum Type {
|
41
|
+
NoMap = 0, // default
|
42
|
+
Array = 1, // sequential ids (only for add, no add_with_ids)
|
43
|
+
Hashtable = 2 // arbitrary ids
|
44
|
+
};
|
45
|
+
Type type;
|
46
|
+
|
47
|
+
/// map for direct access to the elements. Map ids to LO-encoded entries.
|
48
|
+
std::vector <idx_t> array;
|
49
|
+
std::unordered_map <idx_t, idx_t> hashtable;
|
50
|
+
|
51
|
+
DirectMap();
|
52
|
+
|
53
|
+
/// set type and initialize
|
54
|
+
void set_type (Type new_type, const InvertedLists *invlists, size_t ntotal);
|
55
|
+
|
56
|
+
/// get an entry
|
57
|
+
idx_t get (idx_t id) const;
|
58
|
+
|
59
|
+
/// for quick checks
|
60
|
+
bool no () const {return type == NoMap; }
|
61
|
+
|
62
|
+
/**
|
63
|
+
* update the direct_map
|
64
|
+
*/
|
65
|
+
|
66
|
+
/// throw if Array and ids is not NULL
|
67
|
+
void check_can_add (const idx_t *ids);
|
68
|
+
|
69
|
+
/// non thread-safe version
|
70
|
+
void add_single_id (idx_t id, idx_t list_no, size_t offset);
|
71
|
+
|
72
|
+
/// remove all entries
|
73
|
+
void clear();
|
74
|
+
|
75
|
+
/**
|
76
|
+
* operations on inverted lists that require translation with a DirectMap
|
77
|
+
*/
|
78
|
+
|
79
|
+
/// remove ids from the InvertedLists, possibly using the direct map
|
80
|
+
size_t remove_ids(const IDSelector& sel, InvertedLists *invlists);
|
81
|
+
|
82
|
+
/// update entries, using the direct map
|
83
|
+
void update_codes (InvertedLists *invlists,
|
84
|
+
int n, const idx_t *ids,
|
85
|
+
const idx_t *list_nos,
|
86
|
+
const uint8_t *codes);
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
};
|
91
|
+
|
92
|
+
/// Thread-safe way of updating the direct_map
|
93
|
+
struct DirectMapAdd {
|
94
|
+
|
95
|
+
typedef Index::idx_t idx_t;
|
96
|
+
|
97
|
+
using Type = DirectMap::Type;
|
98
|
+
|
99
|
+
DirectMap &direct_map;
|
100
|
+
DirectMap::Type type;
|
101
|
+
size_t ntotal;
|
102
|
+
size_t n;
|
103
|
+
const idx_t *xids;
|
104
|
+
|
105
|
+
std::vector<idx_t> all_ofs;
|
106
|
+
|
107
|
+
DirectMapAdd (DirectMap &direct_map, size_t n, const idx_t *xids);
|
108
|
+
|
109
|
+
/// add vector i (with id xids[i]) at list_no and offset
|
110
|
+
void add (size_t i, idx_t list_no, size_t offset);
|
111
|
+
|
112
|
+
~DirectMapAdd ();
|
113
|
+
};
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
}
|
118
|
+
|
119
|
+
|
120
|
+
#endif
|