faiss 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -34,6 +34,10 @@
|
|
34
34
|
#include <faiss/IndexScalarQuantizer.h>
|
35
35
|
#include <faiss/IndexHNSW.h>
|
36
36
|
#include <faiss/IndexLattice.h>
|
37
|
+
#include <faiss/IndexPQFastScan.h>
|
38
|
+
#include <faiss/IndexIVFPQFastScan.h>
|
39
|
+
#include <faiss/IndexRefine.h>
|
40
|
+
|
37
41
|
|
38
42
|
#include <faiss/IndexBinaryFlat.h>
|
39
43
|
#include <faiss/IndexBinaryHNSW.h>
|
@@ -62,29 +66,90 @@ struct VTChain {
|
|
62
66
|
/// what kind of training does this coarse quantizer require?
|
63
67
|
char get_trains_alone(const Index *coarse_quantizer) {
|
64
68
|
return
|
69
|
+
dynamic_cast<const IndexFlat*>(coarse_quantizer) ? 0 :
|
70
|
+
// multi index just needs to be quantized
|
65
71
|
dynamic_cast<const MultiIndexQuantizer*>(coarse_quantizer) ? 1 :
|
66
72
|
dynamic_cast<const IndexHNSWFlat*>(coarse_quantizer) ? 2 :
|
67
|
-
|
73
|
+
2; // for complicated indexes, we assume they can't be used as a kmeans index
|
68
74
|
}
|
69
75
|
|
76
|
+
bool str_ends_with(const std::string& s, const std::string& suffix)
|
77
|
+
{
|
78
|
+
return s.rfind(suffix) == std::abs(int(s.size()-suffix.size()));
|
79
|
+
}
|
80
|
+
|
81
|
+
// check if ends with suffix followed by digits
|
82
|
+
bool str_ends_with_digits(const std::string& s, const std::string& suffix)
|
83
|
+
{
|
84
|
+
int i;
|
85
|
+
for(i = s.length() - 1; i >= 0; i--) {
|
86
|
+
if (!isdigit(s[i])) break;
|
87
|
+
}
|
88
|
+
return str_ends_with(s.substr(0, i + 1), suffix);
|
89
|
+
}
|
90
|
+
|
91
|
+
void find_matching_parentheses(const std::string &s, int & i0, int & i1) {
|
92
|
+
int st = 0;
|
93
|
+
for (int i = 0; i < s.length(); i++) {
|
94
|
+
if (s[i] == '(') {
|
95
|
+
if (st == 0) {
|
96
|
+
i0 = i;
|
97
|
+
}
|
98
|
+
st++;
|
99
|
+
}
|
100
|
+
|
101
|
+
if (s[i] == ')') {
|
102
|
+
st--;
|
103
|
+
if (st == 0) {
|
104
|
+
i1 = i;
|
105
|
+
return;
|
106
|
+
}
|
107
|
+
if (st < 0) {
|
108
|
+
FAISS_THROW_FMT("factory string %s: unbalanced parentheses", s.c_str());
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
}
|
113
|
+
FAISS_THROW_FMT("factory string %s: unbalanced parentheses st=%d", s.c_str(), st);
|
70
114
|
|
71
115
|
}
|
72
116
|
|
117
|
+
} // anonymous namespace
|
118
|
+
|
73
119
|
Index *index_factory (int d, const char *description_in, MetricType metric)
|
74
120
|
{
|
75
121
|
FAISS_THROW_IF_NOT(metric == METRIC_L2 ||
|
76
122
|
metric == METRIC_INNER_PRODUCT);
|
77
123
|
VTChain vts;
|
78
124
|
Index *coarse_quantizer = nullptr;
|
125
|
+
std::string parenthesis_ivf, parenthesis_refine;
|
79
126
|
Index *index = nullptr;
|
80
127
|
bool add_idmap = false;
|
81
|
-
|
128
|
+
int d_in = d;
|
82
129
|
|
83
130
|
ScopeDeleter1<Index> del_coarse_quantizer, del_index;
|
84
131
|
|
85
132
|
std::string description(description_in);
|
86
133
|
char *ptr;
|
87
134
|
|
135
|
+
// handle indexes in parentheses
|
136
|
+
while (description.find('(') != std::string::npos) {
|
137
|
+
// then we make a sub-index and remove the () from the description
|
138
|
+
int i0, i1;
|
139
|
+
find_matching_parentheses(description, i0, i1);
|
140
|
+
|
141
|
+
std::string sub_description = description.substr(i0 + 1, i1 - i0 - 1);
|
142
|
+
|
143
|
+
if (str_ends_with_digits(description.substr(0, i0), "IVF")) {
|
144
|
+
parenthesis_ivf = sub_description;
|
145
|
+
} else if (str_ends_with(description.substr(0, i0), "Refine")) {
|
146
|
+
parenthesis_refine = sub_description;
|
147
|
+
} else {
|
148
|
+
FAISS_THROW_MSG("don't know what to do with parenthesis index");
|
149
|
+
}
|
150
|
+
description = description.erase(i0, i1 - i0 + 1);
|
151
|
+
}
|
152
|
+
|
88
153
|
int64_t ncentroids = -1;
|
89
154
|
bool use_2layer = false;
|
90
155
|
int hnsw_M = -1;
|
@@ -95,6 +160,8 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
95
160
|
int d_out, opq_M, nbit, M, M2, pq_m, ncent, r2;
|
96
161
|
std::string stok(tok);
|
97
162
|
nbit = 8;
|
163
|
+
int bbs = -1;
|
164
|
+
char c;
|
98
165
|
|
99
166
|
// to avoid mem leaks with exceptions:
|
100
167
|
// do all tests before any instanciation
|
@@ -140,15 +207,20 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
140
207
|
// coarse quantizers
|
141
208
|
} else if (!coarse_quantizer &&
|
142
209
|
sscanf (tok, "IVF%" PRId64 "_HNSW%d", &ncentroids, &M) == 2) {
|
143
|
-
coarse_quantizer_1 = new IndexHNSWFlat (d, M);
|
210
|
+
coarse_quantizer_1 = new IndexHNSWFlat (d, M, metric);
|
144
211
|
|
145
212
|
} else if (!coarse_quantizer &&
|
146
213
|
sscanf (tok, "IVF%" PRId64, &ncentroids) == 1) {
|
147
|
-
if (
|
214
|
+
if (!parenthesis_ivf.empty()) {
|
215
|
+
coarse_quantizer_1 =
|
216
|
+
index_factory(d, parenthesis_ivf.c_str(), metric);
|
217
|
+
|
218
|
+
} else if (metric == METRIC_L2) {
|
148
219
|
coarse_quantizer_1 = new IndexFlatL2 (d);
|
149
220
|
} else {
|
150
221
|
coarse_quantizer_1 = new IndexFlatIP (d);
|
151
222
|
}
|
223
|
+
|
152
224
|
} else if (!coarse_quantizer && sscanf (tok, "IMI2x%d", &nbit) == 1) {
|
153
225
|
FAISS_THROW_IF_NOT_MSG (metric == METRIC_L2,
|
154
226
|
"MultiIndex not implemented for inner prod search");
|
@@ -228,6 +300,32 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
228
300
|
del_coarse_quantizer.release ();
|
229
301
|
index_ivf->own_fields = true;
|
230
302
|
index_1 = index_ivf;
|
303
|
+
} else if (!index && (
|
304
|
+
sscanf (tok, "PQ%dx4fs_%d", &M, &bbs) == 2 ||
|
305
|
+
(sscanf (tok, "PQ%dx4f%c", &M, &c) == 2 && c == 's') ||
|
306
|
+
(sscanf (tok, "PQ%dx4fs%c", &M, &c) == 2 && c == 'r'))) {
|
307
|
+
if (bbs == -1) {
|
308
|
+
bbs = 32;
|
309
|
+
}
|
310
|
+
bool by_residual = str_ends_with(stok, "fsr");
|
311
|
+
if (coarse_quantizer) {
|
312
|
+
IndexIVFPQFastScan *index_ivf = new IndexIVFPQFastScan(
|
313
|
+
coarse_quantizer, d, ncentroids, M, 4, metric, bbs
|
314
|
+
);
|
315
|
+
index_ivf->quantizer_trains_alone =
|
316
|
+
get_trains_alone (coarse_quantizer);
|
317
|
+
index_ivf->metric_type = metric;
|
318
|
+
index_ivf->by_residual = by_residual;
|
319
|
+
index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT;
|
320
|
+
del_coarse_quantizer.release ();
|
321
|
+
index_ivf->own_fields = true;
|
322
|
+
index_1 = index_ivf;
|
323
|
+
} else {
|
324
|
+
IndexPQFastScan *index_pq = new IndexPQFastScan (
|
325
|
+
d, M, 4, metric, bbs
|
326
|
+
);
|
327
|
+
index_1 = index_pq;
|
328
|
+
}
|
231
329
|
} else if (!index && (sscanf (tok, "PQ%dx%d", &M, &nbit) == 2 ||
|
232
330
|
sscanf (tok, "PQ%d", &M) == 1 ||
|
233
331
|
sscanf (tok, "PQ%dnp", &M) == 1)) {
|
@@ -299,7 +397,12 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
299
397
|
FAISS_THROW_IF_NOT(!coarse_quantizer);
|
300
398
|
index_1 = new IndexLattice(d, M, nbit, r2);
|
301
399
|
} else if (stok == "RFlat") {
|
302
|
-
|
400
|
+
parenthesis_refine = "Flat";
|
401
|
+
} else if (stok == "Refine") {
|
402
|
+
FAISS_THROW_IF_NOT_MSG(
|
403
|
+
!parenthesis_refine.empty(),
|
404
|
+
"Refine index should be provided in parentheses"
|
405
|
+
);
|
303
406
|
} else {
|
304
407
|
FAISS_THROW_FMT( "could not parse token \"%s\" in %s\n",
|
305
408
|
tok, description_in);
|
@@ -356,8 +459,10 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
356
459
|
index = index_pt;
|
357
460
|
}
|
358
461
|
|
359
|
-
if (
|
360
|
-
|
462
|
+
if (!parenthesis_refine.empty()) {
|
463
|
+
Index *refine_index = index_factory(d_in, parenthesis_refine.c_str(), metric);
|
464
|
+
IndexRefine *index_rf = new IndexRefine(index, refine_index);
|
465
|
+
index_rf->own_refine_index = true;
|
361
466
|
index_rf->own_fields = true;
|
362
467
|
index = index_rf;
|
363
468
|
}
|
@@ -51,7 +51,7 @@ const int IO_FLAG_READ_ONLY = 2;
|
|
51
51
|
const int IO_FLAG_ONDISK_SAME_DIR = 4;
|
52
52
|
// don't load IVF data to RAM, only list sizes
|
53
53
|
const int IO_FLAG_SKIP_IVF_DATA = 8;
|
54
|
-
// try to memmap data (useful
|
54
|
+
// try to memmap data (useful to load an ArrayInvertedLists as an OnDiskInvertedLists)
|
55
55
|
const int IO_FLAG_MMAP = IO_FLAG_SKIP_IVF_DATA | 0x646f0000;
|
56
56
|
|
57
57
|
|
@@ -76,53 +76,6 @@ void write_InvertedLists (const InvertedLists *ils, IOWriter *f);
|
|
76
76
|
InvertedLists *read_InvertedLists (IOReader *reader, int io_flags = 0);
|
77
77
|
|
78
78
|
|
79
|
-
#ifndef _MSC_VER
|
80
|
-
/** Callbacks to handle other types of InvertedList objects.
|
81
|
-
*
|
82
|
-
* The callbacks should be registered with add_callback before calling
|
83
|
-
* read_index or read_InvertedLists. The callbacks for
|
84
|
-
* OnDiskInvertedLists are registrered by default. The invlist type is
|
85
|
-
* identified by:
|
86
|
-
*
|
87
|
-
* - the key (a fourcc) at read time
|
88
|
-
* - the class name (as given by typeid.name) at write time
|
89
|
-
*/
|
90
|
-
struct InvertedListsIOHook {
|
91
|
-
const std::string key; ///< string version of the fourcc
|
92
|
-
const std::string classname; ///< typeid.name
|
93
|
-
|
94
|
-
InvertedListsIOHook(const std::string & key, const std::string & classname);
|
95
|
-
|
96
|
-
/// write the index to the IOWriter (including the fourcc)
|
97
|
-
virtual void write(const InvertedLists *ils, IOWriter *f) const = 0;
|
98
|
-
|
99
|
-
/// called when the fourcc matches this class's fourcc
|
100
|
-
virtual InvertedLists * read(IOReader *f, int io_flags) const = 0;
|
101
|
-
|
102
|
-
/** read from a ArrayInvertedLists into this invertedlist type.
|
103
|
-
* For this to work, the callback has to be enabled and the io_flag has to be set to
|
104
|
-
* IO_FLAG_SKIP_IVF_DATA | (16 upper bits of the fourcc)
|
105
|
-
*/
|
106
|
-
virtual InvertedLists * read_ArrayInvertedLists(
|
107
|
-
IOReader *f, int io_flags,
|
108
|
-
size_t nlist, size_t code_size,
|
109
|
-
const std::vector<size_t> &sizes) const = 0;
|
110
|
-
|
111
|
-
virtual ~InvertedListsIOHook() {}
|
112
|
-
|
113
|
-
/**************************** Manage the set of callbacks ******/
|
114
|
-
|
115
|
-
// transfers ownership
|
116
|
-
static void add_callback(InvertedListsIOHook *);
|
117
|
-
static void print_callbacks();
|
118
|
-
static InvertedListsIOHook* lookup(int h);
|
119
|
-
static InvertedListsIOHook* lookup_classname(const std::string & classname);
|
120
|
-
|
121
|
-
};
|
122
|
-
|
123
|
-
#endif // !_MSC_VER
|
124
|
-
|
125
|
-
|
126
79
|
} // namespace faiss
|
127
80
|
|
128
81
|
|
@@ -0,0 +1,151 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#include <faiss/invlists/BlockInvertedLists.h>
|
9
|
+
|
10
|
+
#include <faiss/impl/FaissAssert.h>
|
11
|
+
|
12
|
+
#include <faiss/impl/io.h>
|
13
|
+
#include <faiss/impl/io_macros.h>
|
14
|
+
|
15
|
+
|
16
|
+
namespace faiss {
|
17
|
+
|
18
|
+
BlockInvertedLists::BlockInvertedLists (
|
19
|
+
size_t nlist, size_t n_per_block,
|
20
|
+
size_t block_size):
|
21
|
+
InvertedLists (nlist, InvertedLists::INVALID_CODE_SIZE),
|
22
|
+
n_per_block(n_per_block), block_size(block_size)
|
23
|
+
{
|
24
|
+
ids.resize (nlist);
|
25
|
+
codes.resize (nlist);
|
26
|
+
}
|
27
|
+
|
28
|
+
BlockInvertedLists::BlockInvertedLists ():
|
29
|
+
InvertedLists (0, InvertedLists::INVALID_CODE_SIZE),
|
30
|
+
n_per_block(0), block_size(0)
|
31
|
+
{}
|
32
|
+
|
33
|
+
|
34
|
+
size_t BlockInvertedLists::add_entries (
|
35
|
+
size_t list_no, size_t n_entry,
|
36
|
+
const idx_t* ids_in, const uint8_t *code)
|
37
|
+
{
|
38
|
+
if (n_entry == 0) return 0;
|
39
|
+
FAISS_THROW_IF_NOT (list_no < nlist);
|
40
|
+
size_t o = ids [list_no].size();
|
41
|
+
FAISS_THROW_IF_NOT (o == 0); // not clear how we should handle subsequent adds
|
42
|
+
ids [list_no].resize (o + n_entry);
|
43
|
+
memcpy (&ids[list_no][o], ids_in, sizeof (ids_in[0]) * n_entry);
|
44
|
+
|
45
|
+
// copy whole blocks
|
46
|
+
size_t n_block = (n_entry + n_per_block - 1) / n_per_block;
|
47
|
+
codes [list_no].resize (n_block * block_size);
|
48
|
+
memcpy (&codes[list_no][o * code_size], code, n_block * block_size);
|
49
|
+
return o;
|
50
|
+
}
|
51
|
+
|
52
|
+
size_t BlockInvertedLists::list_size(size_t list_no) const
|
53
|
+
{
|
54
|
+
assert (list_no < nlist);
|
55
|
+
return ids[list_no].size();
|
56
|
+
}
|
57
|
+
|
58
|
+
const uint8_t * BlockInvertedLists::get_codes (size_t list_no) const
|
59
|
+
{
|
60
|
+
assert (list_no < nlist);
|
61
|
+
return codes[list_no].get();
|
62
|
+
}
|
63
|
+
|
64
|
+
const InvertedLists::idx_t * BlockInvertedLists::get_ids (size_t list_no) const
|
65
|
+
{
|
66
|
+
assert (list_no < nlist);
|
67
|
+
return ids[list_no].data();
|
68
|
+
}
|
69
|
+
|
70
|
+
void BlockInvertedLists::resize (size_t list_no, size_t new_size)
|
71
|
+
{
|
72
|
+
ids[list_no].resize (new_size);
|
73
|
+
size_t prev_nbytes = codes[list_no].size();
|
74
|
+
size_t n_block = (new_size + n_per_block - 1) / n_per_block;
|
75
|
+
size_t new_nbytes = n_block * block_size;
|
76
|
+
codes[list_no].resize (new_nbytes);
|
77
|
+
if (prev_nbytes < new_nbytes) {
|
78
|
+
// set new elements to 0
|
79
|
+
memset(
|
80
|
+
codes[list_no].data() + prev_nbytes, 0,
|
81
|
+
new_nbytes - prev_nbytes
|
82
|
+
);
|
83
|
+
}
|
84
|
+
}
|
85
|
+
|
86
|
+
void BlockInvertedLists::update_entries (
|
87
|
+
size_t , size_t , size_t ,
|
88
|
+
const idx_t *, const uint8_t *)
|
89
|
+
{
|
90
|
+
FAISS_THROW_MSG("not impemented");
|
91
|
+
/*
|
92
|
+
assert (list_no < nlist);
|
93
|
+
assert (n_entry + offset <= ids[list_no].size());
|
94
|
+
memcpy (&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
|
95
|
+
memcpy (&codes[list_no][offset * code_size], codes_in, code_size * n_entry);
|
96
|
+
*/
|
97
|
+
}
|
98
|
+
|
99
|
+
|
100
|
+
BlockInvertedLists::~BlockInvertedLists ()
|
101
|
+
{}
|
102
|
+
|
103
|
+
/**************************************************
|
104
|
+
* IO hook implementation
|
105
|
+
**************************************************/
|
106
|
+
|
107
|
+
BlockInvertedListsIOHook::BlockInvertedListsIOHook():
|
108
|
+
InvertedListsIOHook("ilbl", typeid(BlockInvertedLists).name())
|
109
|
+
{}
|
110
|
+
|
111
|
+
|
112
|
+
void BlockInvertedListsIOHook::write(const InvertedLists *ils_in, IOWriter *f) const
|
113
|
+
{
|
114
|
+
uint32_t h = fourcc ("ilbl");
|
115
|
+
WRITE1 (h);
|
116
|
+
const BlockInvertedLists *il =
|
117
|
+
dynamic_cast<const BlockInvertedLists*> (ils_in);
|
118
|
+
WRITE1 (il->nlist);
|
119
|
+
WRITE1 (il->code_size);
|
120
|
+
WRITE1 (il->n_per_block);
|
121
|
+
WRITE1 (il->block_size);
|
122
|
+
|
123
|
+
for (size_t i = 0; i < il->nlist; i++) {
|
124
|
+
WRITEVECTOR(il->ids[i]);
|
125
|
+
WRITEVECTOR(il->codes[i]);
|
126
|
+
}
|
127
|
+
}
|
128
|
+
|
129
|
+
InvertedLists * BlockInvertedListsIOHook::read(IOReader *f, int /* io_flags */) const
|
130
|
+
{
|
131
|
+
BlockInvertedLists *il = new BlockInvertedLists();
|
132
|
+
READ1 (il->nlist);
|
133
|
+
READ1 (il->code_size);
|
134
|
+
READ1 (il->n_per_block);
|
135
|
+
READ1 (il->block_size);
|
136
|
+
|
137
|
+
il->ids.resize(il->nlist);
|
138
|
+
il->codes.resize(il->nlist);
|
139
|
+
|
140
|
+
for (size_t i = 0; i < il->nlist; i++) {
|
141
|
+
READVECTOR(il->ids[i]);
|
142
|
+
READVECTOR(il->codes[i]);
|
143
|
+
}
|
144
|
+
|
145
|
+
return il;
|
146
|
+
}
|
147
|
+
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
} // namespace faiss
|
@@ -0,0 +1,76 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include <faiss/invlists/InvertedLists.h>
|
12
|
+
#include <faiss/invlists/InvertedListsIOHook.h>
|
13
|
+
#include <faiss/utils/AlignedTable.h>
|
14
|
+
#include <faiss/index_io.h>
|
15
|
+
|
16
|
+
namespace faiss {
|
17
|
+
|
18
|
+
/** Inverted Lists that are organized by blocks.
|
19
|
+
*
|
20
|
+
* Different from the regular inverted lists, the codes are organized by blocks
|
21
|
+
* of size block_size bytes that reprsent a set of n_per_block. Therefore, code
|
22
|
+
* allocations are always rounded up to block_size bytes. The codes are also
|
23
|
+
* aligned on 32-byte boundaries for use with SIMD.
|
24
|
+
*
|
25
|
+
* To avoid misinterpretations, the code_size is set to (size_t)(-1), even if
|
26
|
+
* arguably the amount of memory consumed by code is block_size / n_per_block.
|
27
|
+
*
|
28
|
+
* The writing functions add_entries and update_entries operate on block-aligned
|
29
|
+
* data.
|
30
|
+
*/
|
31
|
+
struct BlockInvertedLists: InvertedLists {
|
32
|
+
|
33
|
+
size_t n_per_block; // nb of vectors stored per block
|
34
|
+
size_t block_size; // nb bytes per block
|
35
|
+
|
36
|
+
std::vector<AlignedTable<uint8_t>> codes;
|
37
|
+
std::vector<std::vector<idx_t>> ids;
|
38
|
+
|
39
|
+
|
40
|
+
BlockInvertedLists (
|
41
|
+
size_t nlist, size_t vec_per_block,
|
42
|
+
size_t block_size
|
43
|
+
);
|
44
|
+
|
45
|
+
BlockInvertedLists();
|
46
|
+
|
47
|
+
size_t list_size(size_t list_no) const override;
|
48
|
+
const uint8_t * get_codes (size_t list_no) const override;
|
49
|
+
const idx_t * get_ids (size_t list_no) const override;
|
50
|
+
|
51
|
+
// works only on empty BlockInvertedLists
|
52
|
+
// the codes should be of size ceil(n_entry / n_per_block) * block_size
|
53
|
+
// and padded with 0s
|
54
|
+
size_t add_entries (
|
55
|
+
size_t list_no, size_t n_entry,
|
56
|
+
const idx_t* ids, const uint8_t *code) override;
|
57
|
+
|
58
|
+
/// not implemented
|
59
|
+
void update_entries (size_t list_no, size_t offset, size_t n_entry,
|
60
|
+
const idx_t *ids, const uint8_t *code) override;
|
61
|
+
|
62
|
+
// also pads new data with 0s
|
63
|
+
void resize (size_t list_no, size_t new_size) override;
|
64
|
+
|
65
|
+
~BlockInvertedLists () override;
|
66
|
+
|
67
|
+
};
|
68
|
+
|
69
|
+
struct BlockInvertedListsIOHook : InvertedListsIOHook {
|
70
|
+
BlockInvertedListsIOHook();
|
71
|
+
void write(const InvertedLists *ils, IOWriter *f) const override;
|
72
|
+
InvertedLists * read(IOReader *f, int io_flags) const override;
|
73
|
+
};
|
74
|
+
|
75
|
+
|
76
|
+
} // namespace faiss
|