faiss 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -34,6 +34,10 @@
|
|
34
34
|
#include <faiss/IndexScalarQuantizer.h>
|
35
35
|
#include <faiss/IndexHNSW.h>
|
36
36
|
#include <faiss/IndexLattice.h>
|
37
|
+
#include <faiss/IndexPQFastScan.h>
|
38
|
+
#include <faiss/IndexIVFPQFastScan.h>
|
39
|
+
#include <faiss/IndexRefine.h>
|
40
|
+
|
37
41
|
|
38
42
|
#include <faiss/IndexBinaryFlat.h>
|
39
43
|
#include <faiss/IndexBinaryHNSW.h>
|
@@ -62,29 +66,90 @@ struct VTChain {
|
|
62
66
|
/// what kind of training does this coarse quantizer require?
|
63
67
|
char get_trains_alone(const Index *coarse_quantizer) {
|
64
68
|
return
|
69
|
+
dynamic_cast<const IndexFlat*>(coarse_quantizer) ? 0 :
|
70
|
+
// multi index just needs to be quantized
|
65
71
|
dynamic_cast<const MultiIndexQuantizer*>(coarse_quantizer) ? 1 :
|
66
72
|
dynamic_cast<const IndexHNSWFlat*>(coarse_quantizer) ? 2 :
|
67
|
-
|
73
|
+
2; // for complicated indexes, we assume they can't be used as a kmeans index
|
68
74
|
}
|
69
75
|
|
76
|
+
bool str_ends_with(const std::string& s, const std::string& suffix)
|
77
|
+
{
|
78
|
+
return s.rfind(suffix) == std::abs(int(s.size()-suffix.size()));
|
79
|
+
}
|
80
|
+
|
81
|
+
// check if ends with suffix followed by digits
|
82
|
+
bool str_ends_with_digits(const std::string& s, const std::string& suffix)
|
83
|
+
{
|
84
|
+
int i;
|
85
|
+
for(i = s.length() - 1; i >= 0; i--) {
|
86
|
+
if (!isdigit(s[i])) break;
|
87
|
+
}
|
88
|
+
return str_ends_with(s.substr(0, i + 1), suffix);
|
89
|
+
}
|
90
|
+
|
91
|
+
void find_matching_parentheses(const std::string &s, int & i0, int & i1) {
|
92
|
+
int st = 0;
|
93
|
+
for (int i = 0; i < s.length(); i++) {
|
94
|
+
if (s[i] == '(') {
|
95
|
+
if (st == 0) {
|
96
|
+
i0 = i;
|
97
|
+
}
|
98
|
+
st++;
|
99
|
+
}
|
100
|
+
|
101
|
+
if (s[i] == ')') {
|
102
|
+
st--;
|
103
|
+
if (st == 0) {
|
104
|
+
i1 = i;
|
105
|
+
return;
|
106
|
+
}
|
107
|
+
if (st < 0) {
|
108
|
+
FAISS_THROW_FMT("factory string %s: unbalanced parentheses", s.c_str());
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
}
|
113
|
+
FAISS_THROW_FMT("factory string %s: unbalanced parentheses st=%d", s.c_str(), st);
|
70
114
|
|
71
115
|
}
|
72
116
|
|
117
|
+
} // anonymous namespace
|
118
|
+
|
73
119
|
Index *index_factory (int d, const char *description_in, MetricType metric)
|
74
120
|
{
|
75
121
|
FAISS_THROW_IF_NOT(metric == METRIC_L2 ||
|
76
122
|
metric == METRIC_INNER_PRODUCT);
|
77
123
|
VTChain vts;
|
78
124
|
Index *coarse_quantizer = nullptr;
|
125
|
+
std::string parenthesis_ivf, parenthesis_refine;
|
79
126
|
Index *index = nullptr;
|
80
127
|
bool add_idmap = false;
|
81
|
-
|
128
|
+
int d_in = d;
|
82
129
|
|
83
130
|
ScopeDeleter1<Index> del_coarse_quantizer, del_index;
|
84
131
|
|
85
132
|
std::string description(description_in);
|
86
133
|
char *ptr;
|
87
134
|
|
135
|
+
// handle indexes in parentheses
|
136
|
+
while (description.find('(') != std::string::npos) {
|
137
|
+
// then we make a sub-index and remove the () from the description
|
138
|
+
int i0, i1;
|
139
|
+
find_matching_parentheses(description, i0, i1);
|
140
|
+
|
141
|
+
std::string sub_description = description.substr(i0 + 1, i1 - i0 - 1);
|
142
|
+
|
143
|
+
if (str_ends_with_digits(description.substr(0, i0), "IVF")) {
|
144
|
+
parenthesis_ivf = sub_description;
|
145
|
+
} else if (str_ends_with(description.substr(0, i0), "Refine")) {
|
146
|
+
parenthesis_refine = sub_description;
|
147
|
+
} else {
|
148
|
+
FAISS_THROW_MSG("don't know what to do with parenthesis index");
|
149
|
+
}
|
150
|
+
description = description.erase(i0, i1 - i0 + 1);
|
151
|
+
}
|
152
|
+
|
88
153
|
int64_t ncentroids = -1;
|
89
154
|
bool use_2layer = false;
|
90
155
|
int hnsw_M = -1;
|
@@ -95,6 +160,8 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
95
160
|
int d_out, opq_M, nbit, M, M2, pq_m, ncent, r2;
|
96
161
|
std::string stok(tok);
|
97
162
|
nbit = 8;
|
163
|
+
int bbs = -1;
|
164
|
+
char c;
|
98
165
|
|
99
166
|
// to avoid mem leaks with exceptions:
|
100
167
|
// do all tests before any instanciation
|
@@ -140,15 +207,20 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
140
207
|
// coarse quantizers
|
141
208
|
} else if (!coarse_quantizer &&
|
142
209
|
sscanf (tok, "IVF%" PRId64 "_HNSW%d", &ncentroids, &M) == 2) {
|
143
|
-
coarse_quantizer_1 = new IndexHNSWFlat (d, M);
|
210
|
+
coarse_quantizer_1 = new IndexHNSWFlat (d, M, metric);
|
144
211
|
|
145
212
|
} else if (!coarse_quantizer &&
|
146
213
|
sscanf (tok, "IVF%" PRId64, &ncentroids) == 1) {
|
147
|
-
if (
|
214
|
+
if (!parenthesis_ivf.empty()) {
|
215
|
+
coarse_quantizer_1 =
|
216
|
+
index_factory(d, parenthesis_ivf.c_str(), metric);
|
217
|
+
|
218
|
+
} else if (metric == METRIC_L2) {
|
148
219
|
coarse_quantizer_1 = new IndexFlatL2 (d);
|
149
220
|
} else {
|
150
221
|
coarse_quantizer_1 = new IndexFlatIP (d);
|
151
222
|
}
|
223
|
+
|
152
224
|
} else if (!coarse_quantizer && sscanf (tok, "IMI2x%d", &nbit) == 1) {
|
153
225
|
FAISS_THROW_IF_NOT_MSG (metric == METRIC_L2,
|
154
226
|
"MultiIndex not implemented for inner prod search");
|
@@ -228,6 +300,32 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
228
300
|
del_coarse_quantizer.release ();
|
229
301
|
index_ivf->own_fields = true;
|
230
302
|
index_1 = index_ivf;
|
303
|
+
} else if (!index && (
|
304
|
+
sscanf (tok, "PQ%dx4fs_%d", &M, &bbs) == 2 ||
|
305
|
+
(sscanf (tok, "PQ%dx4f%c", &M, &c) == 2 && c == 's') ||
|
306
|
+
(sscanf (tok, "PQ%dx4fs%c", &M, &c) == 2 && c == 'r'))) {
|
307
|
+
if (bbs == -1) {
|
308
|
+
bbs = 32;
|
309
|
+
}
|
310
|
+
bool by_residual = str_ends_with(stok, "fsr");
|
311
|
+
if (coarse_quantizer) {
|
312
|
+
IndexIVFPQFastScan *index_ivf = new IndexIVFPQFastScan(
|
313
|
+
coarse_quantizer, d, ncentroids, M, 4, metric, bbs
|
314
|
+
);
|
315
|
+
index_ivf->quantizer_trains_alone =
|
316
|
+
get_trains_alone (coarse_quantizer);
|
317
|
+
index_ivf->metric_type = metric;
|
318
|
+
index_ivf->by_residual = by_residual;
|
319
|
+
index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT;
|
320
|
+
del_coarse_quantizer.release ();
|
321
|
+
index_ivf->own_fields = true;
|
322
|
+
index_1 = index_ivf;
|
323
|
+
} else {
|
324
|
+
IndexPQFastScan *index_pq = new IndexPQFastScan (
|
325
|
+
d, M, 4, metric, bbs
|
326
|
+
);
|
327
|
+
index_1 = index_pq;
|
328
|
+
}
|
231
329
|
} else if (!index && (sscanf (tok, "PQ%dx%d", &M, &nbit) == 2 ||
|
232
330
|
sscanf (tok, "PQ%d", &M) == 1 ||
|
233
331
|
sscanf (tok, "PQ%dnp", &M) == 1)) {
|
@@ -299,7 +397,12 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
299
397
|
FAISS_THROW_IF_NOT(!coarse_quantizer);
|
300
398
|
index_1 = new IndexLattice(d, M, nbit, r2);
|
301
399
|
} else if (stok == "RFlat") {
|
302
|
-
|
400
|
+
parenthesis_refine = "Flat";
|
401
|
+
} else if (stok == "Refine") {
|
402
|
+
FAISS_THROW_IF_NOT_MSG(
|
403
|
+
!parenthesis_refine.empty(),
|
404
|
+
"Refine index should be provided in parentheses"
|
405
|
+
);
|
303
406
|
} else {
|
304
407
|
FAISS_THROW_FMT( "could not parse token \"%s\" in %s\n",
|
305
408
|
tok, description_in);
|
@@ -356,8 +459,10 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
356
459
|
index = index_pt;
|
357
460
|
}
|
358
461
|
|
359
|
-
if (
|
360
|
-
|
462
|
+
if (!parenthesis_refine.empty()) {
|
463
|
+
Index *refine_index = index_factory(d_in, parenthesis_refine.c_str(), metric);
|
464
|
+
IndexRefine *index_rf = new IndexRefine(index, refine_index);
|
465
|
+
index_rf->own_refine_index = true;
|
361
466
|
index_rf->own_fields = true;
|
362
467
|
index = index_rf;
|
363
468
|
}
|
@@ -51,7 +51,7 @@ const int IO_FLAG_READ_ONLY = 2;
|
|
51
51
|
const int IO_FLAG_ONDISK_SAME_DIR = 4;
|
52
52
|
// don't load IVF data to RAM, only list sizes
|
53
53
|
const int IO_FLAG_SKIP_IVF_DATA = 8;
|
54
|
-
// try to memmap data (useful
|
54
|
+
// try to memmap data (useful to load an ArrayInvertedLists as an OnDiskInvertedLists)
|
55
55
|
const int IO_FLAG_MMAP = IO_FLAG_SKIP_IVF_DATA | 0x646f0000;
|
56
56
|
|
57
57
|
|
@@ -76,53 +76,6 @@ void write_InvertedLists (const InvertedLists *ils, IOWriter *f);
|
|
76
76
|
InvertedLists *read_InvertedLists (IOReader *reader, int io_flags = 0);
|
77
77
|
|
78
78
|
|
79
|
-
#ifndef _MSC_VER
|
80
|
-
/** Callbacks to handle other types of InvertedList objects.
|
81
|
-
*
|
82
|
-
* The callbacks should be registered with add_callback before calling
|
83
|
-
* read_index or read_InvertedLists. The callbacks for
|
84
|
-
* OnDiskInvertedLists are registrered by default. The invlist type is
|
85
|
-
* identified by:
|
86
|
-
*
|
87
|
-
* - the key (a fourcc) at read time
|
88
|
-
* - the class name (as given by typeid.name) at write time
|
89
|
-
*/
|
90
|
-
struct InvertedListsIOHook {
|
91
|
-
const std::string key; ///< string version of the fourcc
|
92
|
-
const std::string classname; ///< typeid.name
|
93
|
-
|
94
|
-
InvertedListsIOHook(const std::string & key, const std::string & classname);
|
95
|
-
|
96
|
-
/// write the index to the IOWriter (including the fourcc)
|
97
|
-
virtual void write(const InvertedLists *ils, IOWriter *f) const = 0;
|
98
|
-
|
99
|
-
/// called when the fourcc matches this class's fourcc
|
100
|
-
virtual InvertedLists * read(IOReader *f, int io_flags) const = 0;
|
101
|
-
|
102
|
-
/** read from a ArrayInvertedLists into this invertedlist type.
|
103
|
-
* For this to work, the callback has to be enabled and the io_flag has to be set to
|
104
|
-
* IO_FLAG_SKIP_IVF_DATA | (16 upper bits of the fourcc)
|
105
|
-
*/
|
106
|
-
virtual InvertedLists * read_ArrayInvertedLists(
|
107
|
-
IOReader *f, int io_flags,
|
108
|
-
size_t nlist, size_t code_size,
|
109
|
-
const std::vector<size_t> &sizes) const = 0;
|
110
|
-
|
111
|
-
virtual ~InvertedListsIOHook() {}
|
112
|
-
|
113
|
-
/**************************** Manage the set of callbacks ******/
|
114
|
-
|
115
|
-
// transfers ownership
|
116
|
-
static void add_callback(InvertedListsIOHook *);
|
117
|
-
static void print_callbacks();
|
118
|
-
static InvertedListsIOHook* lookup(int h);
|
119
|
-
static InvertedListsIOHook* lookup_classname(const std::string & classname);
|
120
|
-
|
121
|
-
};
|
122
|
-
|
123
|
-
#endif // !_MSC_VER
|
124
|
-
|
125
|
-
|
126
79
|
} // namespace faiss
|
127
80
|
|
128
81
|
|
@@ -0,0 +1,151 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#include <faiss/invlists/BlockInvertedLists.h>
|
9
|
+
|
10
|
+
#include <faiss/impl/FaissAssert.h>
|
11
|
+
|
12
|
+
#include <faiss/impl/io.h>
|
13
|
+
#include <faiss/impl/io_macros.h>
|
14
|
+
|
15
|
+
|
16
|
+
namespace faiss {
|
17
|
+
|
18
|
+
BlockInvertedLists::BlockInvertedLists (
|
19
|
+
size_t nlist, size_t n_per_block,
|
20
|
+
size_t block_size):
|
21
|
+
InvertedLists (nlist, InvertedLists::INVALID_CODE_SIZE),
|
22
|
+
n_per_block(n_per_block), block_size(block_size)
|
23
|
+
{
|
24
|
+
ids.resize (nlist);
|
25
|
+
codes.resize (nlist);
|
26
|
+
}
|
27
|
+
|
28
|
+
BlockInvertedLists::BlockInvertedLists ():
|
29
|
+
InvertedLists (0, InvertedLists::INVALID_CODE_SIZE),
|
30
|
+
n_per_block(0), block_size(0)
|
31
|
+
{}
|
32
|
+
|
33
|
+
|
34
|
+
size_t BlockInvertedLists::add_entries (
|
35
|
+
size_t list_no, size_t n_entry,
|
36
|
+
const idx_t* ids_in, const uint8_t *code)
|
37
|
+
{
|
38
|
+
if (n_entry == 0) return 0;
|
39
|
+
FAISS_THROW_IF_NOT (list_no < nlist);
|
40
|
+
size_t o = ids [list_no].size();
|
41
|
+
FAISS_THROW_IF_NOT (o == 0); // not clear how we should handle subsequent adds
|
42
|
+
ids [list_no].resize (o + n_entry);
|
43
|
+
memcpy (&ids[list_no][o], ids_in, sizeof (ids_in[0]) * n_entry);
|
44
|
+
|
45
|
+
// copy whole blocks
|
46
|
+
size_t n_block = (n_entry + n_per_block - 1) / n_per_block;
|
47
|
+
codes [list_no].resize (n_block * block_size);
|
48
|
+
memcpy (&codes[list_no][o * code_size], code, n_block * block_size);
|
49
|
+
return o;
|
50
|
+
}
|
51
|
+
|
52
|
+
size_t BlockInvertedLists::list_size(size_t list_no) const
|
53
|
+
{
|
54
|
+
assert (list_no < nlist);
|
55
|
+
return ids[list_no].size();
|
56
|
+
}
|
57
|
+
|
58
|
+
const uint8_t * BlockInvertedLists::get_codes (size_t list_no) const
|
59
|
+
{
|
60
|
+
assert (list_no < nlist);
|
61
|
+
return codes[list_no].get();
|
62
|
+
}
|
63
|
+
|
64
|
+
const InvertedLists::idx_t * BlockInvertedLists::get_ids (size_t list_no) const
|
65
|
+
{
|
66
|
+
assert (list_no < nlist);
|
67
|
+
return ids[list_no].data();
|
68
|
+
}
|
69
|
+
|
70
|
+
void BlockInvertedLists::resize (size_t list_no, size_t new_size)
|
71
|
+
{
|
72
|
+
ids[list_no].resize (new_size);
|
73
|
+
size_t prev_nbytes = codes[list_no].size();
|
74
|
+
size_t n_block = (new_size + n_per_block - 1) / n_per_block;
|
75
|
+
size_t new_nbytes = n_block * block_size;
|
76
|
+
codes[list_no].resize (new_nbytes);
|
77
|
+
if (prev_nbytes < new_nbytes) {
|
78
|
+
// set new elements to 0
|
79
|
+
memset(
|
80
|
+
codes[list_no].data() + prev_nbytes, 0,
|
81
|
+
new_nbytes - prev_nbytes
|
82
|
+
);
|
83
|
+
}
|
84
|
+
}
|
85
|
+
|
86
|
+
void BlockInvertedLists::update_entries (
|
87
|
+
size_t , size_t , size_t ,
|
88
|
+
const idx_t *, const uint8_t *)
|
89
|
+
{
|
90
|
+
FAISS_THROW_MSG("not impemented");
|
91
|
+
/*
|
92
|
+
assert (list_no < nlist);
|
93
|
+
assert (n_entry + offset <= ids[list_no].size());
|
94
|
+
memcpy (&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
|
95
|
+
memcpy (&codes[list_no][offset * code_size], codes_in, code_size * n_entry);
|
96
|
+
*/
|
97
|
+
}
|
98
|
+
|
99
|
+
|
100
|
+
BlockInvertedLists::~BlockInvertedLists ()
|
101
|
+
{}
|
102
|
+
|
103
|
+
/**************************************************
|
104
|
+
* IO hook implementation
|
105
|
+
**************************************************/
|
106
|
+
|
107
|
+
BlockInvertedListsIOHook::BlockInvertedListsIOHook():
|
108
|
+
InvertedListsIOHook("ilbl", typeid(BlockInvertedLists).name())
|
109
|
+
{}
|
110
|
+
|
111
|
+
|
112
|
+
void BlockInvertedListsIOHook::write(const InvertedLists *ils_in, IOWriter *f) const
|
113
|
+
{
|
114
|
+
uint32_t h = fourcc ("ilbl");
|
115
|
+
WRITE1 (h);
|
116
|
+
const BlockInvertedLists *il =
|
117
|
+
dynamic_cast<const BlockInvertedLists*> (ils_in);
|
118
|
+
WRITE1 (il->nlist);
|
119
|
+
WRITE1 (il->code_size);
|
120
|
+
WRITE1 (il->n_per_block);
|
121
|
+
WRITE1 (il->block_size);
|
122
|
+
|
123
|
+
for (size_t i = 0; i < il->nlist; i++) {
|
124
|
+
WRITEVECTOR(il->ids[i]);
|
125
|
+
WRITEVECTOR(il->codes[i]);
|
126
|
+
}
|
127
|
+
}
|
128
|
+
|
129
|
+
InvertedLists * BlockInvertedListsIOHook::read(IOReader *f, int /* io_flags */) const
|
130
|
+
{
|
131
|
+
BlockInvertedLists *il = new BlockInvertedLists();
|
132
|
+
READ1 (il->nlist);
|
133
|
+
READ1 (il->code_size);
|
134
|
+
READ1 (il->n_per_block);
|
135
|
+
READ1 (il->block_size);
|
136
|
+
|
137
|
+
il->ids.resize(il->nlist);
|
138
|
+
il->codes.resize(il->nlist);
|
139
|
+
|
140
|
+
for (size_t i = 0; i < il->nlist; i++) {
|
141
|
+
READVECTOR(il->ids[i]);
|
142
|
+
READVECTOR(il->codes[i]);
|
143
|
+
}
|
144
|
+
|
145
|
+
return il;
|
146
|
+
}
|
147
|
+
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
} // namespace faiss
|
@@ -0,0 +1,76 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include <faiss/invlists/InvertedLists.h>
|
12
|
+
#include <faiss/invlists/InvertedListsIOHook.h>
|
13
|
+
#include <faiss/utils/AlignedTable.h>
|
14
|
+
#include <faiss/index_io.h>
|
15
|
+
|
16
|
+
namespace faiss {
|
17
|
+
|
18
|
+
/** Inverted Lists that are organized by blocks.
|
19
|
+
*
|
20
|
+
* Different from the regular inverted lists, the codes are organized by blocks
|
21
|
+
* of size block_size bytes that reprsent a set of n_per_block. Therefore, code
|
22
|
+
* allocations are always rounded up to block_size bytes. The codes are also
|
23
|
+
* aligned on 32-byte boundaries for use with SIMD.
|
24
|
+
*
|
25
|
+
* To avoid misinterpretations, the code_size is set to (size_t)(-1), even if
|
26
|
+
* arguably the amount of memory consumed by code is block_size / n_per_block.
|
27
|
+
*
|
28
|
+
* The writing functions add_entries and update_entries operate on block-aligned
|
29
|
+
* data.
|
30
|
+
*/
|
31
|
+
struct BlockInvertedLists: InvertedLists {
|
32
|
+
|
33
|
+
size_t n_per_block; // nb of vectors stored per block
|
34
|
+
size_t block_size; // nb bytes per block
|
35
|
+
|
36
|
+
std::vector<AlignedTable<uint8_t>> codes;
|
37
|
+
std::vector<std::vector<idx_t>> ids;
|
38
|
+
|
39
|
+
|
40
|
+
BlockInvertedLists (
|
41
|
+
size_t nlist, size_t vec_per_block,
|
42
|
+
size_t block_size
|
43
|
+
);
|
44
|
+
|
45
|
+
BlockInvertedLists();
|
46
|
+
|
47
|
+
size_t list_size(size_t list_no) const override;
|
48
|
+
const uint8_t * get_codes (size_t list_no) const override;
|
49
|
+
const idx_t * get_ids (size_t list_no) const override;
|
50
|
+
|
51
|
+
// works only on empty BlockInvertedLists
|
52
|
+
// the codes should be of size ceil(n_entry / n_per_block) * block_size
|
53
|
+
// and padded with 0s
|
54
|
+
size_t add_entries (
|
55
|
+
size_t list_no, size_t n_entry,
|
56
|
+
const idx_t* ids, const uint8_t *code) override;
|
57
|
+
|
58
|
+
/// not implemented
|
59
|
+
void update_entries (size_t list_no, size_t offset, size_t n_entry,
|
60
|
+
const idx_t *ids, const uint8_t *code) override;
|
61
|
+
|
62
|
+
// also pads new data with 0s
|
63
|
+
void resize (size_t list_no, size_t new_size) override;
|
64
|
+
|
65
|
+
~BlockInvertedLists () override;
|
66
|
+
|
67
|
+
};
|
68
|
+
|
69
|
+
struct BlockInvertedListsIOHook : InvertedListsIOHook {
|
70
|
+
BlockInvertedListsIOHook();
|
71
|
+
void write(const InvertedLists *ils, IOWriter *f) const override;
|
72
|
+
InvertedLists * read(IOReader *f, int io_flags) const override;
|
73
|
+
};
|
74
|
+
|
75
|
+
|
76
|
+
} // namespace faiss
|