faiss 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -39,8 +39,12 @@ namespace faiss {
|
|
39
39
|
* that hides the template mess.
|
40
40
|
********************************************************************/
|
41
41
|
|
42
|
-
#
|
42
|
+
#ifdef __AVX2__
|
43
|
+
#ifdef __F16C__
|
43
44
|
#define USE_F16C
|
45
|
+
#else
|
46
|
+
#warning "Cannot enable AVX optimizations in scalar quantizer if -mf16c is not set as well"
|
47
|
+
#endif
|
44
48
|
#endif
|
45
49
|
|
46
50
|
|
@@ -1220,33 +1224,41 @@ SQDistanceComputer *select_distance_computer (
|
|
1220
1224
|
|
1221
1225
|
ScalarQuantizer::ScalarQuantizer
|
1222
1226
|
(size_t d, QuantizerType qtype):
|
1223
|
-
qtype (qtype), rangestat(RS_minmax), rangestat_arg(0), d
|
1227
|
+
qtype (qtype), rangestat(RS_minmax), rangestat_arg(0), d(d)
|
1228
|
+
{
|
1229
|
+
set_derived_sizes();
|
1230
|
+
}
|
1231
|
+
|
1232
|
+
ScalarQuantizer::ScalarQuantizer ():
|
1233
|
+
qtype(QT_8bit),
|
1234
|
+
rangestat(RS_minmax), rangestat_arg(0), d(0), bits(0), code_size(0)
|
1235
|
+
{}
|
1236
|
+
|
1237
|
+
void ScalarQuantizer::set_derived_sizes ()
|
1224
1238
|
{
|
1225
1239
|
switch (qtype) {
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1240
|
+
case QT_8bit:
|
1241
|
+
case QT_8bit_uniform:
|
1242
|
+
case QT_8bit_direct:
|
1229
1243
|
code_size = d;
|
1244
|
+
bits = 8;
|
1230
1245
|
break;
|
1231
|
-
|
1232
|
-
|
1246
|
+
case QT_4bit:
|
1247
|
+
case QT_4bit_uniform:
|
1233
1248
|
code_size = (d + 1) / 2;
|
1249
|
+
bits = 4;
|
1234
1250
|
break;
|
1235
|
-
|
1251
|
+
case QT_6bit:
|
1236
1252
|
code_size = (d * 6 + 7) / 8;
|
1253
|
+
bits = 6;
|
1237
1254
|
break;
|
1238
|
-
|
1255
|
+
case QT_fp16:
|
1239
1256
|
code_size = d * 2;
|
1257
|
+
bits = 16;
|
1240
1258
|
break;
|
1241
1259
|
}
|
1242
|
-
|
1243
1260
|
}
|
1244
1261
|
|
1245
|
-
ScalarQuantizer::ScalarQuantizer ():
|
1246
|
-
qtype(QT_8bit),
|
1247
|
-
rangestat(RS_minmax), rangestat_arg(0), d (0), code_size(0)
|
1248
|
-
{}
|
1249
|
-
|
1250
1262
|
void ScalarQuantizer::train (size_t n, const float *x)
|
1251
1263
|
{
|
1252
1264
|
int bit_per_dim =
|
@@ -1418,9 +1430,8 @@ struct IVFSQScannerIP: InvertedListScanner {
|
|
1418
1430
|
float accu = accu0 + dc.query_to_code (codes);
|
1419
1431
|
|
1420
1432
|
if (accu > simi [0]) {
|
1421
|
-
minheap_pop (k, simi, idxi);
|
1422
1433
|
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
1423
|
-
|
1434
|
+
minheap_replace_top (k, simi, idxi, accu, id);
|
1424
1435
|
nup++;
|
1425
1436
|
}
|
1426
1437
|
codes += code_size;
|
@@ -1506,9 +1517,8 @@ struct IVFSQScannerL2: InvertedListScanner {
|
|
1506
1517
|
float dis = dc.query_to_code (codes);
|
1507
1518
|
|
1508
1519
|
if (dis < simi [0]) {
|
1509
|
-
maxheap_pop (k, simi, idxi);
|
1510
1520
|
int64_t id = store_pairs ? (list_no << 32 | j) : ids[j];
|
1511
|
-
|
1521
|
+
maxheap_replace_top (k, simi, idxi, dis, id);
|
1512
1522
|
nup++;
|
1513
1523
|
}
|
1514
1524
|
codes += code_size;
|
@@ -53,6 +53,9 @@ struct ScalarQuantizer {
|
|
53
53
|
/// dimension of input vectors
|
54
54
|
size_t d;
|
55
55
|
|
56
|
+
/// bits per scalar code
|
57
|
+
size_t bits;
|
58
|
+
|
56
59
|
/// bytes per vector
|
57
60
|
size_t code_size;
|
58
61
|
|
@@ -62,6 +65,9 @@ struct ScalarQuantizer {
|
|
62
65
|
ScalarQuantizer (size_t d, QuantizerType qtype);
|
63
66
|
ScalarQuantizer ();
|
64
67
|
|
68
|
+
/// updates internal values based on qtype and d
|
69
|
+
void set_derived_sizes ();
|
70
|
+
|
65
71
|
void train (size_t n, const float *x);
|
66
72
|
|
67
73
|
/// Used by an IVF index to train based on the residuals
|
@@ -15,15 +15,13 @@
|
|
15
15
|
#include <sys/types.h>
|
16
16
|
#include <sys/stat.h>
|
17
17
|
|
18
|
-
#ifndef _MSC_VER
|
19
|
-
#include <sys/mman.h>
|
20
|
-
#endif // !_MSC_VER
|
21
|
-
|
22
18
|
#include <faiss/impl/FaissAssert.h>
|
23
19
|
#include <faiss/impl/io.h>
|
24
20
|
#include <faiss/impl/io_macros.h>
|
25
21
|
#include <faiss/utils/hamming.h>
|
26
22
|
|
23
|
+
#include <faiss/invlists/InvertedListsIOHook.h>
|
24
|
+
|
27
25
|
#include <faiss/IndexFlat.h>
|
28
26
|
#include <faiss/VectorTransform.h>
|
29
27
|
#include <faiss/IndexPreTransform.h>
|
@@ -39,17 +37,16 @@
|
|
39
37
|
#include <faiss/IndexScalarQuantizer.h>
|
40
38
|
#include <faiss/IndexHNSW.h>
|
41
39
|
#include <faiss/IndexLattice.h>
|
40
|
+
#include <faiss/IndexPQFastScan.h>
|
41
|
+
#include <faiss/IndexIVFPQFastScan.h>
|
42
|
+
#include <faiss/IndexRefine.h>
|
43
|
+
|
42
44
|
#include <faiss/IndexBinaryFlat.h>
|
43
45
|
#include <faiss/IndexBinaryFromFloat.h>
|
44
46
|
#include <faiss/IndexBinaryHNSW.h>
|
45
47
|
#include <faiss/IndexBinaryIVF.h>
|
46
48
|
#include <faiss/IndexBinaryHash.h>
|
47
49
|
|
48
|
-
#ifndef _MSC_VER
|
49
|
-
#include <faiss/OnDiskInvertedLists.h>
|
50
|
-
#endif // !_MSC_VER
|
51
|
-
|
52
|
-
|
53
50
|
namespace faiss {
|
54
51
|
|
55
52
|
|
@@ -141,7 +138,10 @@ VectorTransform* read_VectorTransform (IOReader *f) {
|
|
141
138
|
}
|
142
139
|
vt = itqt;
|
143
140
|
} else {
|
144
|
-
|
141
|
+
FAISS_THROW_FMT(
|
142
|
+
"fourcc %ud (\"%s\") not recognized",
|
143
|
+
h, fourcc_inv_printable(h).c_str()
|
144
|
+
);
|
145
145
|
}
|
146
146
|
READ1 (vt->d_in);
|
147
147
|
READ1 (vt->d_out);
|
@@ -167,7 +167,10 @@ static void read_ArrayInvertedLists_sizes (
|
|
167
167
|
sizes[idsizes[j]] = idsizes[j + 1];
|
168
168
|
}
|
169
169
|
} else {
|
170
|
-
|
170
|
+
FAISS_THROW_FMT(
|
171
|
+
"list_type %ud (\"%s\") not recognized",
|
172
|
+
list_type, fourcc_inv_printable(list_type).c_str()
|
173
|
+
);
|
171
174
|
}
|
172
175
|
}
|
173
176
|
|
@@ -199,11 +202,6 @@ InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
|
|
199
202
|
}
|
200
203
|
return ails;
|
201
204
|
|
202
|
-
#ifdef _MSC_VER
|
203
|
-
} else {
|
204
|
-
FAISS_THROW_MSG("Unsupported inverted list format for Windows");
|
205
|
-
}
|
206
|
-
#else
|
207
205
|
} else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
208
206
|
// code is always ilxx where xx is specific to the type of invlists we want
|
209
207
|
// so we get the 16 high bits from the io_flag and the 16 low bits as "il"
|
@@ -218,7 +216,6 @@ InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
|
|
218
216
|
} else {
|
219
217
|
return InvertedListsIOHook::lookup(h)->read(f, io_flags);
|
220
218
|
}
|
221
|
-
#endif // !_MSC_VER
|
222
219
|
|
223
220
|
}
|
224
221
|
|
@@ -226,8 +223,11 @@ InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
|
|
226
223
|
static void read_InvertedLists (
|
227
224
|
IndexIVF *ivf, IOReader *f, int io_flags) {
|
228
225
|
InvertedLists *ils = read_InvertedLists (f, io_flags);
|
229
|
-
|
230
|
-
|
226
|
+
if (ils) {
|
227
|
+
FAISS_THROW_IF_NOT (ils->nlist == ivf->nlist);
|
228
|
+
FAISS_THROW_IF_NOT (ils->code_size == InvertedLists::INVALID_CODE_SIZE ||
|
229
|
+
ils->code_size == ivf->code_size);
|
230
|
+
}
|
231
231
|
ivf->invlists = ils;
|
232
232
|
ivf->own_invlists = true;
|
233
233
|
}
|
@@ -247,6 +247,7 @@ static void read_ScalarQuantizer (ScalarQuantizer *ivsc, IOReader *f) {
|
|
247
247
|
READ1 (ivsc->d);
|
248
248
|
READ1 (ivsc->code_size);
|
249
249
|
READVECTOR (ivsc->trained);
|
250
|
+
ivsc->set_derived_sizes ();
|
250
251
|
}
|
251
252
|
|
252
253
|
|
@@ -551,14 +552,20 @@ Index *read_index (IOReader *f, int io_flags) {
|
|
551
552
|
read_ProductQuantizer (&imiq->pq, f);
|
552
553
|
idx = imiq;
|
553
554
|
} else if(h == fourcc ("IxRF")) {
|
554
|
-
|
555
|
+
IndexRefine *idxrf = new IndexRefine ();
|
555
556
|
read_index_header (idxrf, f);
|
556
557
|
idxrf->base_index = read_index(f, io_flags);
|
557
|
-
idxrf->
|
558
|
-
IndexFlat *rf = dynamic_cast<IndexFlat*> (read_index (f, io_flags));
|
559
|
-
std::swap (*rf, idxrf->refine_index);
|
560
|
-
delete rf;
|
558
|
+
idxrf->refine_index = read_index(f, io_flags);
|
561
559
|
READ1 (idxrf->k_factor);
|
560
|
+
if (dynamic_cast<IndexFlat*>(idxrf->refine_index)) {
|
561
|
+
// then make a RefineFlat with it
|
562
|
+
IndexRefine *idxrf_old = idxrf;
|
563
|
+
idxrf = new IndexRefineFlat();
|
564
|
+
*idxrf = *idxrf_old;
|
565
|
+
delete idxrf_old;
|
566
|
+
}
|
567
|
+
idxrf->own_fields = true;
|
568
|
+
idxrf->own_refine_index = true;
|
562
569
|
idx = idxrf;
|
563
570
|
} else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) {
|
564
571
|
bool is_map2 = h == fourcc ("IxM2");
|
@@ -598,8 +605,36 @@ Index *read_index (IOReader *f, int io_flags) {
|
|
598
605
|
dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table ();
|
599
606
|
}
|
600
607
|
idx = idxhnsw;
|
608
|
+
} else if(h == fourcc("IPfs")) {
|
609
|
+
IndexPQFastScan *idxpqfs = new IndexPQFastScan();
|
610
|
+
read_index_header (idxpqfs, f);
|
611
|
+
read_ProductQuantizer (&idxpqfs->pq, f);
|
612
|
+
READ1 (idxpqfs->implem);
|
613
|
+
READ1 (idxpqfs->bbs);
|
614
|
+
READ1 (idxpqfs->qbs);
|
615
|
+
READ1 (idxpqfs->ntotal2);
|
616
|
+
READ1 (idxpqfs->M2);
|
617
|
+
READVECTOR (idxpqfs->codes);
|
618
|
+
idx = idxpqfs;
|
619
|
+
|
620
|
+
} else if (h == fourcc("IwPf")) {
|
621
|
+
IndexIVFPQFastScan *ivpq = new IndexIVFPQFastScan();
|
622
|
+
read_ivf_header (ivpq, f);
|
623
|
+
READ1 (ivpq->by_residual);
|
624
|
+
READ1 (ivpq->code_size);
|
625
|
+
READ1 (ivpq->bbs);
|
626
|
+
READ1 (ivpq->M2);
|
627
|
+
READ1 (ivpq->implem);
|
628
|
+
READ1 (ivpq->qbs2);
|
629
|
+
read_ProductQuantizer (&ivpq->pq, f);
|
630
|
+
read_InvertedLists (ivpq, f, io_flags);
|
631
|
+
ivpq->precompute_table();
|
632
|
+
idx = ivpq;
|
601
633
|
} else {
|
602
|
-
FAISS_THROW_FMT(
|
634
|
+
FAISS_THROW_FMT(
|
635
|
+
"Index type 0x%08x (\"%s\") not recognized",
|
636
|
+
h, fourcc_inv_printable(h).c_str()
|
637
|
+
);
|
603
638
|
idx = nullptr;
|
604
639
|
}
|
605
640
|
return idx;
|
@@ -780,7 +815,10 @@ IndexBinary *read_index_binary (IOReader *f, int io_flags) {
|
|
780
815
|
}
|
781
816
|
idx = idxmh;
|
782
817
|
} else {
|
783
|
-
FAISS_THROW_FMT(
|
818
|
+
FAISS_THROW_FMT(
|
819
|
+
"Index type %08x (\"%s\") not recognized",
|
820
|
+
h, fourcc_inv_printable(h).c_str()
|
821
|
+
);
|
784
822
|
idx = nullptr;
|
785
823
|
}
|
786
824
|
return idx;
|
@@ -797,76 +835,6 @@ IndexBinary *read_index_binary (const char *fname, int io_flags) {
|
|
797
835
|
return idx;
|
798
836
|
}
|
799
837
|
|
800
|
-
#ifndef _MSC_VER
|
801
|
-
|
802
|
-
/**********************************************************
|
803
|
-
* InvertedListIOHook's
|
804
|
-
**********************************************************/
|
805
|
-
|
806
|
-
InvertedListsIOHook::InvertedListsIOHook(
|
807
|
-
const std::string & key, const std::string & classname):
|
808
|
-
key(key), classname(classname)
|
809
|
-
{}
|
810
|
-
|
811
|
-
namespace {
|
812
|
-
|
813
|
-
/// std::vector that deletes its contents
|
814
|
-
struct IOHookTable: std::vector<InvertedListsIOHook*> {
|
815
|
-
|
816
|
-
IOHookTable() {
|
817
|
-
push_back(new OnDiskInvertedListsIOHook());
|
818
|
-
}
|
819
|
-
|
820
|
-
~IOHookTable() {
|
821
|
-
for (auto x: *this) {
|
822
|
-
delete x;
|
823
|
-
}
|
824
|
-
}
|
825
|
-
};
|
826
|
-
|
827
|
-
static IOHookTable InvertedListsIOHook_table;
|
828
|
-
|
829
|
-
} // anonymous namepsace
|
830
|
-
|
831
|
-
InvertedListsIOHook* InvertedListsIOHook::lookup(int h)
|
832
|
-
{
|
833
|
-
for(const auto & callback: InvertedListsIOHook_table) {
|
834
|
-
if (h == fourcc(callback->key)) {
|
835
|
-
return callback;
|
836
|
-
}
|
837
|
-
}
|
838
|
-
FAISS_THROW_FMT ("read_InvertedLists: could not load ArrayInvertedLists as %04x", h);
|
839
|
-
}
|
840
|
-
|
841
|
-
InvertedListsIOHook* InvertedListsIOHook::lookup_classname(const std::string & classname)
|
842
|
-
{
|
843
|
-
for(const auto & callback: InvertedListsIOHook_table) {
|
844
|
-
if (callback->classname == classname) {
|
845
|
-
return callback;
|
846
|
-
}
|
847
|
-
}
|
848
|
-
FAISS_THROW_FMT ("read_InvertedLists: could not find classname %s", classname.c_str());
|
849
|
-
}
|
850
|
-
|
851
|
-
void InvertedListsIOHook::add_callback(InvertedListsIOHook *cb)
|
852
|
-
{
|
853
|
-
InvertedListsIOHook_table.push_back(cb);
|
854
|
-
}
|
855
|
-
|
856
|
-
void InvertedListsIOHook::print_callbacks()
|
857
|
-
{
|
858
|
-
printf("registered %zd InvertedListsIOHooks:\n",
|
859
|
-
InvertedListsIOHook_table.size());
|
860
|
-
for(const auto & cb: InvertedListsIOHook_table) {
|
861
|
-
printf("%08x %s %s\n",
|
862
|
-
fourcc(cb->key.c_str()),
|
863
|
-
cb->key.c_str(),
|
864
|
-
cb->classname.c_str());
|
865
|
-
}
|
866
|
-
}
|
867
|
-
|
868
|
-
#endif // !_MSC_VER
|
869
|
-
|
870
838
|
|
871
839
|
|
872
840
|
} // namespace faiss
|
@@ -15,9 +15,7 @@
|
|
15
15
|
#include <sys/types.h>
|
16
16
|
#include <sys/stat.h>
|
17
17
|
|
18
|
-
#
|
19
|
-
#include <sys/mman.h>
|
20
|
-
#endif // !_MSC_VER
|
18
|
+
#include <faiss/invlists/InvertedListsIOHook.h>
|
21
19
|
|
22
20
|
#include <faiss/impl/FaissAssert.h>
|
23
21
|
#include <faiss/impl/io.h>
|
@@ -39,6 +37,9 @@
|
|
39
37
|
#include <faiss/IndexScalarQuantizer.h>
|
40
38
|
#include <faiss/IndexHNSW.h>
|
41
39
|
#include <faiss/IndexLattice.h>
|
40
|
+
#include <faiss/IndexPQFastScan.h>
|
41
|
+
#include <faiss/IndexIVFPQFastScan.h>
|
42
|
+
#include <faiss/IndexRefine.h>
|
42
43
|
|
43
44
|
#include <faiss/IndexBinaryFlat.h>
|
44
45
|
#include <faiss/IndexBinaryFromFloat.h>
|
@@ -46,11 +47,6 @@
|
|
46
47
|
#include <faiss/IndexBinaryIVF.h>
|
47
48
|
#include <faiss/IndexBinaryHash.h>
|
48
49
|
|
49
|
-
#ifndef _MSC_VER
|
50
|
-
#include <faiss/OnDiskInvertedLists.h>
|
51
|
-
#endif // !_MSC_VER
|
52
|
-
|
53
|
-
|
54
50
|
/*************************************************************
|
55
51
|
* The I/O format is the content of the class. For objects that are
|
56
52
|
* inherited, like Index, a 4-character-code (fourcc) indicates which
|
@@ -66,9 +62,6 @@
|
|
66
62
|
* or deprecated fields), the fourcc can be replaced. New code should
|
67
63
|
* be able to read the old fourcc and fill in new classes.
|
68
64
|
*
|
69
|
-
* TODO: serialization to strings for use in Python pickle or Torch
|
70
|
-
* serialization.
|
71
|
-
*
|
72
65
|
* TODO: in this file, the read functions that encouter errors may
|
73
66
|
* leak memory.
|
74
67
|
**************************************************************/
|
@@ -216,19 +209,10 @@ void write_InvertedLists (const InvertedLists *ils, IOWriter *f) {
|
|
216
209
|
WRITEANDCHECK (ails->ids[i].data(), n);
|
217
210
|
}
|
218
211
|
}
|
219
|
-
#ifndef _MSC_VER
|
220
|
-
} else {
|
221
212
|
|
213
|
+
} else {
|
222
214
|
InvertedListsIOHook::lookup_classname(
|
223
215
|
typeid(*ils).name())->write(ils, f);
|
224
|
-
|
225
|
-
/*
|
226
|
-
fprintf(stderr, "WARN! write_InvertedLists: unsupported invlist type, "
|
227
|
-
"saving null invlist\n");
|
228
|
-
uint32_t h = fourcc ("il00");
|
229
|
-
WRITE1 (h);
|
230
|
-
*/
|
231
|
-
#endif // !_MSC_VER
|
232
216
|
}
|
233
217
|
}
|
234
218
|
|
@@ -409,13 +393,13 @@ void write_index (const Index *idx, IOWriter *f) {
|
|
409
393
|
WRITE1 (h);
|
410
394
|
write_index_header (imiq, f);
|
411
395
|
write_ProductQuantizer (&imiq->pq, f);
|
412
|
-
} else if(const
|
413
|
-
dynamic_cast<const
|
396
|
+
} else if(const IndexRefine * idxrf =
|
397
|
+
dynamic_cast<const IndexRefine *> (idx)) {
|
414
398
|
uint32_t h = fourcc ("IxRF");
|
415
399
|
WRITE1 (h);
|
416
400
|
write_index_header (idxrf, f);
|
417
401
|
write_index (idxrf->base_index, f);
|
418
|
-
write_index (
|
402
|
+
write_index (idxrf->refine_index, f);
|
419
403
|
WRITE1 (idxrf->k_factor);
|
420
404
|
} else if(const IndexIDMap * idxmap =
|
421
405
|
dynamic_cast<const IndexIDMap *> (idx)) {
|
@@ -440,8 +424,33 @@ void write_index (const Index *idx, IOWriter *f) {
|
|
440
424
|
write_index_header (idxhnsw, f);
|
441
425
|
write_HNSW (&idxhnsw->hnsw, f);
|
442
426
|
write_index (idxhnsw->storage, f);
|
427
|
+
} else if (const IndexPQFastScan *idxpqfs =
|
428
|
+
dynamic_cast<const IndexPQFastScan*>(idx)) {
|
429
|
+
uint32_t h = fourcc("IPfs");
|
430
|
+
WRITE1 (h);
|
431
|
+
write_index_header (idxpqfs, f);
|
432
|
+
write_ProductQuantizer (&idxpqfs->pq, f);
|
433
|
+
WRITE1 (idxpqfs->implem);
|
434
|
+
WRITE1 (idxpqfs->bbs);
|
435
|
+
WRITE1 (idxpqfs->qbs);
|
436
|
+
WRITE1 (idxpqfs->ntotal2);
|
437
|
+
WRITE1 (idxpqfs->M2);
|
438
|
+
WRITEVECTOR (idxpqfs->codes);
|
439
|
+
} else if (const IndexIVFPQFastScan * ivpq =
|
440
|
+
dynamic_cast<const IndexIVFPQFastScan *> (idx)) {
|
441
|
+
uint32_t h = fourcc ("IwPf");
|
442
|
+
WRITE1 (h);
|
443
|
+
write_ivf_header (ivpq, f);
|
444
|
+
WRITE1 (ivpq->by_residual);
|
445
|
+
WRITE1 (ivpq->code_size);
|
446
|
+
WRITE1 (ivpq->bbs);
|
447
|
+
WRITE1 (ivpq->M2);
|
448
|
+
WRITE1 (ivpq->implem);
|
449
|
+
WRITE1 (ivpq->qbs2);
|
450
|
+
write_ProductQuantizer (&ivpq->pq, f);
|
451
|
+
write_InvertedLists (ivpq->invlists, f);
|
443
452
|
} else {
|
444
|
-
|
453
|
+
FAISS_THROW_MSG ("don't know how to serialize this type of index");
|
445
454
|
}
|
446
455
|
}
|
447
456
|
|