faiss 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -39,8 +39,12 @@ namespace faiss {
|
|
39
39
|
* that hides the template mess.
|
40
40
|
********************************************************************/
|
41
41
|
|
42
|
-
#
|
42
|
+
#ifdef __AVX2__
|
43
|
+
#ifdef __F16C__
|
43
44
|
#define USE_F16C
|
45
|
+
#else
|
46
|
+
#warning "Cannot enable AVX optimizations in scalar quantizer if -mf16c is not set as well"
|
47
|
+
#endif
|
44
48
|
#endif
|
45
49
|
|
46
50
|
|
@@ -1220,33 +1224,41 @@ SQDistanceComputer *select_distance_computer (
|
|
1220
1224
|
|
1221
1225
|
ScalarQuantizer::ScalarQuantizer
|
1222
1226
|
(size_t d, QuantizerType qtype):
|
1223
|
-
qtype (qtype), rangestat(RS_minmax), rangestat_arg(0), d
|
1227
|
+
qtype (qtype), rangestat(RS_minmax), rangestat_arg(0), d(d)
|
1228
|
+
{
|
1229
|
+
set_derived_sizes();
|
1230
|
+
}
|
1231
|
+
|
1232
|
+
ScalarQuantizer::ScalarQuantizer ():
|
1233
|
+
qtype(QT_8bit),
|
1234
|
+
rangestat(RS_minmax), rangestat_arg(0), d(0), bits(0), code_size(0)
|
1235
|
+
{}
|
1236
|
+
|
1237
|
+
void ScalarQuantizer::set_derived_sizes ()
|
1224
1238
|
{
|
1225
1239
|
switch (qtype) {
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1240
|
+
case QT_8bit:
|
1241
|
+
case QT_8bit_uniform:
|
1242
|
+
case QT_8bit_direct:
|
1229
1243
|
code_size = d;
|
1244
|
+
bits = 8;
|
1230
1245
|
break;
|
1231
|
-
|
1232
|
-
|
1246
|
+
case QT_4bit:
|
1247
|
+
case QT_4bit_uniform:
|
1233
1248
|
code_size = (d + 1) / 2;
|
1249
|
+
bits = 4;
|
1234
1250
|
break;
|
1235
|
-
|
1251
|
+
case QT_6bit:
|
1236
1252
|
code_size = (d * 6 + 7) / 8;
|
1253
|
+
bits = 6;
|
1237
1254
|
break;
|
1238
|
-
|
1255
|
+
case QT_fp16:
|
1239
1256
|
code_size = d * 2;
|
1257
|
+
bits = 16;
|
1240
1258
|
break;
|
1241
1259
|
}
|
1242
|
-
|
1243
1260
|
}
|
1244
1261
|
|
1245
|
-
ScalarQuantizer::ScalarQuantizer ():
|
1246
|
-
qtype(QT_8bit),
|
1247
|
-
rangestat(RS_minmax), rangestat_arg(0), d (0), code_size(0)
|
1248
|
-
{}
|
1249
|
-
|
1250
1262
|
void ScalarQuantizer::train (size_t n, const float *x)
|
1251
1263
|
{
|
1252
1264
|
int bit_per_dim =
|
@@ -1418,9 +1430,8 @@ struct IVFSQScannerIP: InvertedListScanner {
|
|
1418
1430
|
float accu = accu0 + dc.query_to_code (codes);
|
1419
1431
|
|
1420
1432
|
if (accu > simi [0]) {
|
1421
|
-
minheap_pop (k, simi, idxi);
|
1422
1433
|
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
1423
|
-
|
1434
|
+
minheap_replace_top (k, simi, idxi, accu, id);
|
1424
1435
|
nup++;
|
1425
1436
|
}
|
1426
1437
|
codes += code_size;
|
@@ -1506,9 +1517,8 @@ struct IVFSQScannerL2: InvertedListScanner {
|
|
1506
1517
|
float dis = dc.query_to_code (codes);
|
1507
1518
|
|
1508
1519
|
if (dis < simi [0]) {
|
1509
|
-
maxheap_pop (k, simi, idxi);
|
1510
1520
|
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
1511
|
-
|
1521
|
+
maxheap_replace_top (k, simi, idxi, dis, id);
|
1512
1522
|
nup++;
|
1513
1523
|
}
|
1514
1524
|
codes += code_size;
|
@@ -53,6 +53,9 @@ struct ScalarQuantizer {
|
|
53
53
|
/// dimension of input vectors
|
54
54
|
size_t d;
|
55
55
|
|
56
|
+
/// bits per scalar code
|
57
|
+
size_t bits;
|
58
|
+
|
56
59
|
/// bytes per vector
|
57
60
|
size_t code_size;
|
58
61
|
|
@@ -62,6 +65,9 @@ struct ScalarQuantizer {
|
|
62
65
|
ScalarQuantizer (size_t d, QuantizerType qtype);
|
63
66
|
ScalarQuantizer ();
|
64
67
|
|
68
|
+
/// updates internal values based on qtype and d
|
69
|
+
void set_derived_sizes ();
|
70
|
+
|
65
71
|
void train (size_t n, const float *x);
|
66
72
|
|
67
73
|
/// Used by an IVF index to train based on the residuals
|
@@ -15,15 +15,13 @@
|
|
15
15
|
#include <sys/types.h>
|
16
16
|
#include <sys/stat.h>
|
17
17
|
|
18
|
-
#ifndef _MSC_VER
|
19
|
-
#include <sys/mman.h>
|
20
|
-
#endif // !_MSC_VER
|
21
|
-
|
22
18
|
#include <faiss/impl/FaissAssert.h>
|
23
19
|
#include <faiss/impl/io.h>
|
24
20
|
#include <faiss/impl/io_macros.h>
|
25
21
|
#include <faiss/utils/hamming.h>
|
26
22
|
|
23
|
+
#include <faiss/invlists/InvertedListsIOHook.h>
|
24
|
+
|
27
25
|
#include <faiss/IndexFlat.h>
|
28
26
|
#include <faiss/VectorTransform.h>
|
29
27
|
#include <faiss/IndexPreTransform.h>
|
@@ -39,17 +37,16 @@
|
|
39
37
|
#include <faiss/IndexScalarQuantizer.h>
|
40
38
|
#include <faiss/IndexHNSW.h>
|
41
39
|
#include <faiss/IndexLattice.h>
|
40
|
+
#include <faiss/IndexPQFastScan.h>
|
41
|
+
#include <faiss/IndexIVFPQFastScan.h>
|
42
|
+
#include <faiss/IndexRefine.h>
|
43
|
+
|
42
44
|
#include <faiss/IndexBinaryFlat.h>
|
43
45
|
#include <faiss/IndexBinaryFromFloat.h>
|
44
46
|
#include <faiss/IndexBinaryHNSW.h>
|
45
47
|
#include <faiss/IndexBinaryIVF.h>
|
46
48
|
#include <faiss/IndexBinaryHash.h>
|
47
49
|
|
48
|
-
#ifndef _MSC_VER
|
49
|
-
#include <faiss/OnDiskInvertedLists.h>
|
50
|
-
#endif // !_MSC_VER
|
51
|
-
|
52
|
-
|
53
50
|
namespace faiss {
|
54
51
|
|
55
52
|
|
@@ -141,7 +138,10 @@ VectorTransform* read_VectorTransform (IOReader *f) {
|
|
141
138
|
}
|
142
139
|
vt = itqt;
|
143
140
|
} else {
|
144
|
-
|
141
|
+
FAISS_THROW_FMT(
|
142
|
+
"fourcc %ud (\"%s\") not recognized",
|
143
|
+
h, fourcc_inv_printable(h).c_str()
|
144
|
+
);
|
145
145
|
}
|
146
146
|
READ1 (vt->d_in);
|
147
147
|
READ1 (vt->d_out);
|
@@ -167,7 +167,10 @@ static void read_ArrayInvertedLists_sizes (
|
|
167
167
|
sizes[idsizes[j]] = idsizes[j + 1];
|
168
168
|
}
|
169
169
|
} else {
|
170
|
-
|
170
|
+
FAISS_THROW_FMT(
|
171
|
+
"list_type %ud (\"%s\") not recognized",
|
172
|
+
list_type, fourcc_inv_printable(list_type).c_str()
|
173
|
+
);
|
171
174
|
}
|
172
175
|
}
|
173
176
|
|
@@ -199,11 +202,6 @@ InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
|
|
199
202
|
}
|
200
203
|
return ails;
|
201
204
|
|
202
|
-
#ifdef _MSC_VER
|
203
|
-
} else {
|
204
|
-
FAISS_THROW_MSG("Unsupported inverted list format for Windows");
|
205
|
-
}
|
206
|
-
#else
|
207
205
|
} else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
208
206
|
// code is always ilxx where xx is specific to the type of invlists we want
|
209
207
|
// so we get the 16 high bits from the io_flag and the 16 low bits as "il"
|
@@ -218,7 +216,6 @@ InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
|
|
218
216
|
} else {
|
219
217
|
return InvertedListsIOHook::lookup(h)->read(f, io_flags);
|
220
218
|
}
|
221
|
-
#endif // !_MSC_VER
|
222
219
|
|
223
220
|
}
|
224
221
|
|
@@ -226,8 +223,11 @@ InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
|
|
226
223
|
static void read_InvertedLists (
|
227
224
|
IndexIVF *ivf, IOReader *f, int io_flags) {
|
228
225
|
InvertedLists *ils = read_InvertedLists (f, io_flags);
|
229
|
-
|
230
|
-
|
226
|
+
if (ils) {
|
227
|
+
FAISS_THROW_IF_NOT (ils->nlist == ivf->nlist);
|
228
|
+
FAISS_THROW_IF_NOT (ils->code_size == InvertedLists::INVALID_CODE_SIZE ||
|
229
|
+
ils->code_size == ivf->code_size);
|
230
|
+
}
|
231
231
|
ivf->invlists = ils;
|
232
232
|
ivf->own_invlists = true;
|
233
233
|
}
|
@@ -247,6 +247,7 @@ static void read_ScalarQuantizer (ScalarQuantizer *ivsc, IOReader *f) {
|
|
247
247
|
READ1 (ivsc->d);
|
248
248
|
READ1 (ivsc->code_size);
|
249
249
|
READVECTOR (ivsc->trained);
|
250
|
+
ivsc->set_derived_sizes ();
|
250
251
|
}
|
251
252
|
|
252
253
|
|
@@ -551,14 +552,20 @@ Index *read_index (IOReader *f, int io_flags) {
|
|
551
552
|
read_ProductQuantizer (&imiq->pq, f);
|
552
553
|
idx = imiq;
|
553
554
|
} else if(h == fourcc ("IxRF")) {
|
554
|
-
|
555
|
+
IndexRefine *idxrf = new IndexRefine ();
|
555
556
|
read_index_header (idxrf, f);
|
556
557
|
idxrf->base_index = read_index(f, io_flags);
|
557
|
-
idxrf->
|
558
|
-
IndexFlat *rf = dynamic_cast<IndexFlat*> (read_index (f, io_flags));
|
559
|
-
std::swap (*rf, idxrf->refine_index);
|
560
|
-
delete rf;
|
558
|
+
idxrf->refine_index = read_index(f, io_flags);
|
561
559
|
READ1 (idxrf->k_factor);
|
560
|
+
if (dynamic_cast<IndexFlat*>(idxrf->refine_index)) {
|
561
|
+
// then make a RefineFlat with it
|
562
|
+
IndexRefine *idxrf_old = idxrf;
|
563
|
+
idxrf = new IndexRefineFlat();
|
564
|
+
*idxrf = *idxrf_old;
|
565
|
+
delete idxrf_old;
|
566
|
+
}
|
567
|
+
idxrf->own_fields = true;
|
568
|
+
idxrf->own_refine_index = true;
|
562
569
|
idx = idxrf;
|
563
570
|
} else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) {
|
564
571
|
bool is_map2 = h == fourcc ("IxM2");
|
@@ -598,8 +605,36 @@ Index *read_index (IOReader *f, int io_flags) {
|
|
598
605
|
dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table ();
|
599
606
|
}
|
600
607
|
idx = idxhnsw;
|
608
|
+
} else if(h == fourcc("IPfs")) {
|
609
|
+
IndexPQFastScan *idxpqfs = new IndexPQFastScan();
|
610
|
+
read_index_header (idxpqfs, f);
|
611
|
+
read_ProductQuantizer (&idxpqfs->pq, f);
|
612
|
+
READ1 (idxpqfs->implem);
|
613
|
+
READ1 (idxpqfs->bbs);
|
614
|
+
READ1 (idxpqfs->qbs);
|
615
|
+
READ1 (idxpqfs->ntotal2);
|
616
|
+
READ1 (idxpqfs->M2);
|
617
|
+
READVECTOR (idxpqfs->codes);
|
618
|
+
idx = idxpqfs;
|
619
|
+
|
620
|
+
} else if (h == fourcc("IwPf")) {
|
621
|
+
IndexIVFPQFastScan *ivpq = new IndexIVFPQFastScan();
|
622
|
+
read_ivf_header (ivpq, f);
|
623
|
+
READ1 (ivpq->by_residual);
|
624
|
+
READ1 (ivpq->code_size);
|
625
|
+
READ1 (ivpq->bbs);
|
626
|
+
READ1 (ivpq->M2);
|
627
|
+
READ1 (ivpq->implem);
|
628
|
+
READ1 (ivpq->qbs2);
|
629
|
+
read_ProductQuantizer (&ivpq->pq, f);
|
630
|
+
read_InvertedLists (ivpq, f, io_flags);
|
631
|
+
ivpq->precompute_table();
|
632
|
+
idx = ivpq;
|
601
633
|
} else {
|
602
|
-
FAISS_THROW_FMT(
|
634
|
+
FAISS_THROW_FMT(
|
635
|
+
"Index type 0x%08x (\"%s\") not recognized",
|
636
|
+
h, fourcc_inv_printable(h).c_str()
|
637
|
+
);
|
603
638
|
idx = nullptr;
|
604
639
|
}
|
605
640
|
return idx;
|
@@ -780,7 +815,10 @@ IndexBinary *read_index_binary (IOReader *f, int io_flags) {
|
|
780
815
|
}
|
781
816
|
idx = idxmh;
|
782
817
|
} else {
|
783
|
-
FAISS_THROW_FMT(
|
818
|
+
FAISS_THROW_FMT(
|
819
|
+
"Index type %08x (\"%s\") not recognized",
|
820
|
+
h, fourcc_inv_printable(h).c_str()
|
821
|
+
);
|
784
822
|
idx = nullptr;
|
785
823
|
}
|
786
824
|
return idx;
|
@@ -797,76 +835,6 @@ IndexBinary *read_index_binary (const char *fname, int io_flags) {
|
|
797
835
|
return idx;
|
798
836
|
}
|
799
837
|
|
800
|
-
#ifndef _MSC_VER
|
801
|
-
|
802
|
-
/**********************************************************
|
803
|
-
* InvertedListIOHook's
|
804
|
-
**********************************************************/
|
805
|
-
|
806
|
-
InvertedListsIOHook::InvertedListsIOHook(
|
807
|
-
const std::string & key, const std::string & classname):
|
808
|
-
key(key), classname(classname)
|
809
|
-
{}
|
810
|
-
|
811
|
-
namespace {
|
812
|
-
|
813
|
-
/// std::vector that deletes its contents
|
814
|
-
struct IOHookTable: std::vector<InvertedListsIOHook*> {
|
815
|
-
|
816
|
-
IOHookTable() {
|
817
|
-
push_back(new OnDiskInvertedListsIOHook());
|
818
|
-
}
|
819
|
-
|
820
|
-
~IOHookTable() {
|
821
|
-
for (auto x: *this) {
|
822
|
-
delete x;
|
823
|
-
}
|
824
|
-
}
|
825
|
-
};
|
826
|
-
|
827
|
-
static IOHookTable InvertedListsIOHook_table;
|
828
|
-
|
829
|
-
} // anonymous namepsace
|
830
|
-
|
831
|
-
InvertedListsIOHook* InvertedListsIOHook::lookup(int h)
|
832
|
-
{
|
833
|
-
for(const auto & callback: InvertedListsIOHook_table) {
|
834
|
-
if (h == fourcc(callback->key)) {
|
835
|
-
return callback;
|
836
|
-
}
|
837
|
-
}
|
838
|
-
FAISS_THROW_FMT ("read_InvertedLists: could not load ArrayInvertedLists as %04x", h);
|
839
|
-
}
|
840
|
-
|
841
|
-
InvertedListsIOHook* InvertedListsIOHook::lookup_classname(const std::string & classname)
|
842
|
-
{
|
843
|
-
for(const auto & callback: InvertedListsIOHook_table) {
|
844
|
-
if (callback->classname == classname) {
|
845
|
-
return callback;
|
846
|
-
}
|
847
|
-
}
|
848
|
-
FAISS_THROW_FMT ("read_InvertedLists: could not find classname %s", classname.c_str());
|
849
|
-
}
|
850
|
-
|
851
|
-
void InvertedListsIOHook::add_callback(InvertedListsIOHook *cb)
|
852
|
-
{
|
853
|
-
InvertedListsIOHook_table.push_back(cb);
|
854
|
-
}
|
855
|
-
|
856
|
-
void InvertedListsIOHook::print_callbacks()
|
857
|
-
{
|
858
|
-
printf("registered %zd InvertedListsIOHooks:\n",
|
859
|
-
InvertedListsIOHook_table.size());
|
860
|
-
for(const auto & cb: InvertedListsIOHook_table) {
|
861
|
-
printf("%08x %s %s\n",
|
862
|
-
fourcc(cb->key.c_str()),
|
863
|
-
cb->key.c_str(),
|
864
|
-
cb->classname.c_str());
|
865
|
-
}
|
866
|
-
}
|
867
|
-
|
868
|
-
#endif // !_MSC_VER
|
869
|
-
|
870
838
|
|
871
839
|
|
872
840
|
} // namespace faiss
|
@@ -15,9 +15,7 @@
|
|
15
15
|
#include <sys/types.h>
|
16
16
|
#include <sys/stat.h>
|
17
17
|
|
18
|
-
#
|
19
|
-
#include <sys/mman.h>
|
20
|
-
#endif // !_MSC_VER
|
18
|
+
#include <faiss/invlists/InvertedListsIOHook.h>
|
21
19
|
|
22
20
|
#include <faiss/impl/FaissAssert.h>
|
23
21
|
#include <faiss/impl/io.h>
|
@@ -39,6 +37,9 @@
|
|
39
37
|
#include <faiss/IndexScalarQuantizer.h>
|
40
38
|
#include <faiss/IndexHNSW.h>
|
41
39
|
#include <faiss/IndexLattice.h>
|
40
|
+
#include <faiss/IndexPQFastScan.h>
|
41
|
+
#include <faiss/IndexIVFPQFastScan.h>
|
42
|
+
#include <faiss/IndexRefine.h>
|
42
43
|
|
43
44
|
#include <faiss/IndexBinaryFlat.h>
|
44
45
|
#include <faiss/IndexBinaryFromFloat.h>
|
@@ -46,11 +47,6 @@
|
|
46
47
|
#include <faiss/IndexBinaryIVF.h>
|
47
48
|
#include <faiss/IndexBinaryHash.h>
|
48
49
|
|
49
|
-
#ifndef _MSC_VER
|
50
|
-
#include <faiss/OnDiskInvertedLists.h>
|
51
|
-
#endif // !_MSC_VER
|
52
|
-
|
53
|
-
|
54
50
|
/*************************************************************
|
55
51
|
* The I/O format is the content of the class. For objects that are
|
56
52
|
* inherited, like Index, a 4-character-code (fourcc) indicates which
|
@@ -66,9 +62,6 @@
|
|
66
62
|
* or deprecated fields), the fourcc can be replaced. New code should
|
67
63
|
* be able to read the old fourcc and fill in new classes.
|
68
64
|
*
|
69
|
-
* TODO: serialization to strings for use in Python pickle or Torch
|
70
|
-
* serialization.
|
71
|
-
*
|
72
65
|
* TODO: in this file, the read functions that encouter errors may
|
73
66
|
* leak memory.
|
74
67
|
**************************************************************/
|
@@ -216,19 +209,10 @@ void write_InvertedLists (const InvertedLists *ils, IOWriter *f) {
|
|
216
209
|
WRITEANDCHECK (ails->ids[i].data(), n);
|
217
210
|
}
|
218
211
|
}
|
219
|
-
#ifndef _MSC_VER
|
220
|
-
} else {
|
221
212
|
|
213
|
+
} else {
|
222
214
|
InvertedListsIOHook::lookup_classname(
|
223
215
|
typeid(*ils).name())->write(ils, f);
|
224
|
-
|
225
|
-
/*
|
226
|
-
fprintf(stderr, "WARN! write_InvertedLists: unsupported invlist type, "
|
227
|
-
"saving null invlist\n");
|
228
|
-
uint32_t h = fourcc ("il00");
|
229
|
-
WRITE1 (h);
|
230
|
-
*/
|
231
|
-
#endif // !_MSC_VER
|
232
216
|
}
|
233
217
|
}
|
234
218
|
|
@@ -409,13 +393,13 @@ void write_index (const Index *idx, IOWriter *f) {
|
|
409
393
|
WRITE1 (h);
|
410
394
|
write_index_header (imiq, f);
|
411
395
|
write_ProductQuantizer (&imiq->pq, f);
|
412
|
-
} else if(const
|
413
|
-
dynamic_cast<const
|
396
|
+
} else if(const IndexRefine * idxrf =
|
397
|
+
dynamic_cast<const IndexRefine *> (idx)) {
|
414
398
|
uint32_t h = fourcc ("IxRF");
|
415
399
|
WRITE1 (h);
|
416
400
|
write_index_header (idxrf, f);
|
417
401
|
write_index (idxrf->base_index, f);
|
418
|
-
write_index (
|
402
|
+
write_index (idxrf->refine_index, f);
|
419
403
|
WRITE1 (idxrf->k_factor);
|
420
404
|
} else if(const IndexIDMap * idxmap =
|
421
405
|
dynamic_cast<const IndexIDMap *> (idx)) {
|
@@ -440,8 +424,33 @@ void write_index (const Index *idx, IOWriter *f) {
|
|
440
424
|
write_index_header (idxhnsw, f);
|
441
425
|
write_HNSW (&idxhnsw->hnsw, f);
|
442
426
|
write_index (idxhnsw->storage, f);
|
427
|
+
} else if (const IndexPQFastScan *idxpqfs =
|
428
|
+
dynamic_cast<const IndexPQFastScan*>(idx)) {
|
429
|
+
uint32_t h = fourcc("IPfs");
|
430
|
+
WRITE1 (h);
|
431
|
+
write_index_header (idxpqfs, f);
|
432
|
+
write_ProductQuantizer (&idxpqfs->pq, f);
|
433
|
+
WRITE1 (idxpqfs->implem);
|
434
|
+
WRITE1 (idxpqfs->bbs);
|
435
|
+
WRITE1 (idxpqfs->qbs);
|
436
|
+
WRITE1 (idxpqfs->ntotal2);
|
437
|
+
WRITE1 (idxpqfs->M2);
|
438
|
+
WRITEVECTOR (idxpqfs->codes);
|
439
|
+
} else if (const IndexIVFPQFastScan * ivpq =
|
440
|
+
dynamic_cast<const IndexIVFPQFastScan *> (idx)) {
|
441
|
+
uint32_t h = fourcc ("IwPf");
|
442
|
+
WRITE1 (h);
|
443
|
+
write_ivf_header (ivpq, f);
|
444
|
+
WRITE1 (ivpq->by_residual);
|
445
|
+
WRITE1 (ivpq->code_size);
|
446
|
+
WRITE1 (ivpq->bbs);
|
447
|
+
WRITE1 (ivpq->M2);
|
448
|
+
WRITE1 (ivpq->implem);
|
449
|
+
WRITE1 (ivpq->qbs2);
|
450
|
+
write_ProductQuantizer (&ivpq->pq, f);
|
451
|
+
write_InvertedLists (ivpq->invlists, f);
|
443
452
|
} else {
|
444
|
-
|
453
|
+
FAISS_THROW_MSG ("don't know how to serialize this type of index");
|
445
454
|
}
|
446
455
|
}
|
447
456
|
|