faiss 0.1.7 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/README.md +7 -7
- data/ext/faiss/ext.cpp +1 -1
- data/ext/faiss/extconf.rb +8 -2
- data/ext/faiss/index.cpp +102 -69
- data/ext/faiss/index_binary.cpp +24 -30
- data/ext/faiss/kmeans.cpp +20 -16
- data/ext/faiss/numo.hpp +867 -0
- data/ext/faiss/pca_matrix.cpp +13 -14
- data/ext/faiss/product_quantizer.cpp +23 -24
- data/ext/faiss/utils.cpp +10 -37
- data/ext/faiss/utils.h +2 -13
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +0 -5
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +26 -12
- data/lib/faiss/index.rb +0 -20
- data/lib/faiss/index_binary.rb +0 -20
- data/lib/faiss/kmeans.rb +0 -15
- data/lib/faiss/pca_matrix.rb +0 -15
- data/lib/faiss/product_quantizer.rb +0 -22
|
@@ -9,11 +9,13 @@
|
|
|
9
9
|
|
|
10
10
|
#include <faiss/index_io.h>
|
|
11
11
|
|
|
12
|
+
#include <faiss/impl/io_macros.h>
|
|
13
|
+
|
|
12
14
|
#include <cstdio>
|
|
13
15
|
#include <cstdlib>
|
|
14
16
|
|
|
15
|
-
#include <sys/types.h>
|
|
16
17
|
#include <sys/stat.h>
|
|
18
|
+
#include <sys/types.h>
|
|
17
19
|
|
|
18
20
|
#include <faiss/impl/FaissAssert.h>
|
|
19
21
|
#include <faiss/impl/io.h>
|
|
@@ -22,343 +24,383 @@
|
|
|
22
24
|
|
|
23
25
|
#include <faiss/invlists/InvertedListsIOHook.h>
|
|
24
26
|
|
|
27
|
+
#include <faiss/Index2Layer.h>
|
|
25
28
|
#include <faiss/IndexFlat.h>
|
|
26
|
-
#include <faiss/
|
|
27
|
-
#include <faiss/IndexPreTransform.h>
|
|
28
|
-
#include <faiss/IndexLSH.h>
|
|
29
|
-
#include <faiss/IndexPQ.h>
|
|
29
|
+
#include <faiss/IndexHNSW.h>
|
|
30
30
|
#include <faiss/IndexIVF.h>
|
|
31
|
+
#include <faiss/IndexIVFFlat.h>
|
|
31
32
|
#include <faiss/IndexIVFPQ.h>
|
|
33
|
+
#include <faiss/IndexIVFPQFastScan.h>
|
|
32
34
|
#include <faiss/IndexIVFPQR.h>
|
|
33
|
-
#include <faiss/Index2Layer.h>
|
|
34
|
-
#include <faiss/IndexIVFFlat.h>
|
|
35
35
|
#include <faiss/IndexIVFSpectralHash.h>
|
|
36
|
-
#include <faiss/
|
|
37
|
-
#include <faiss/IndexScalarQuantizer.h>
|
|
38
|
-
#include <faiss/IndexHNSW.h>
|
|
36
|
+
#include <faiss/IndexLSH.h>
|
|
39
37
|
#include <faiss/IndexLattice.h>
|
|
38
|
+
#include <faiss/IndexNSG.h>
|
|
39
|
+
#include <faiss/IndexPQ.h>
|
|
40
40
|
#include <faiss/IndexPQFastScan.h>
|
|
41
|
-
#include <faiss/
|
|
41
|
+
#include <faiss/IndexPreTransform.h>
|
|
42
42
|
#include <faiss/IndexRefine.h>
|
|
43
|
+
#include <faiss/IndexResidual.h>
|
|
44
|
+
#include <faiss/IndexScalarQuantizer.h>
|
|
45
|
+
#include <faiss/MetaIndexes.h>
|
|
46
|
+
#include <faiss/VectorTransform.h>
|
|
43
47
|
|
|
44
48
|
#include <faiss/IndexBinaryFlat.h>
|
|
45
49
|
#include <faiss/IndexBinaryFromFloat.h>
|
|
46
50
|
#include <faiss/IndexBinaryHNSW.h>
|
|
47
|
-
#include <faiss/IndexBinaryIVF.h>
|
|
48
51
|
#include <faiss/IndexBinaryHash.h>
|
|
52
|
+
#include <faiss/IndexBinaryIVF.h>
|
|
49
53
|
|
|
50
54
|
namespace faiss {
|
|
51
55
|
|
|
52
|
-
|
|
53
56
|
/*************************************************************
|
|
54
57
|
* Read
|
|
55
58
|
**************************************************************/
|
|
56
59
|
|
|
57
|
-
static void read_index_header
|
|
58
|
-
READ1
|
|
59
|
-
READ1
|
|
60
|
+
static void read_index_header(Index* idx, IOReader* f) {
|
|
61
|
+
READ1(idx->d);
|
|
62
|
+
READ1(idx->ntotal);
|
|
60
63
|
Index::idx_t dummy;
|
|
61
|
-
READ1
|
|
62
|
-
READ1
|
|
63
|
-
READ1
|
|
64
|
-
READ1
|
|
64
|
+
READ1(dummy);
|
|
65
|
+
READ1(dummy);
|
|
66
|
+
READ1(idx->is_trained);
|
|
67
|
+
READ1(idx->metric_type);
|
|
65
68
|
if (idx->metric_type > 1) {
|
|
66
|
-
READ1
|
|
69
|
+
READ1(idx->metric_arg);
|
|
67
70
|
}
|
|
68
71
|
idx->verbose = false;
|
|
69
72
|
}
|
|
70
73
|
|
|
71
|
-
VectorTransform* read_VectorTransform
|
|
74
|
+
VectorTransform* read_VectorTransform(IOReader* f) {
|
|
72
75
|
uint32_t h;
|
|
73
|
-
READ1
|
|
74
|
-
VectorTransform
|
|
75
|
-
|
|
76
|
-
if (h == fourcc
|
|
77
|
-
h == fourcc
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
READVECTOR
|
|
90
|
-
READVECTOR (pca->eigenvalues);
|
|
91
|
-
READVECTOR (pca->PCAMat);
|
|
76
|
+
READ1(h);
|
|
77
|
+
VectorTransform* vt = nullptr;
|
|
78
|
+
|
|
79
|
+
if (h == fourcc("rrot") || h == fourcc("PCAm") || h == fourcc("LTra") ||
|
|
80
|
+
h == fourcc("PcAm") || h == fourcc("Viqm")) {
|
|
81
|
+
LinearTransform* lt = nullptr;
|
|
82
|
+
if (h == fourcc("rrot")) {
|
|
83
|
+
lt = new RandomRotationMatrix();
|
|
84
|
+
} else if (h == fourcc("PCAm") || h == fourcc("PcAm")) {
|
|
85
|
+
PCAMatrix* pca = new PCAMatrix();
|
|
86
|
+
READ1(pca->eigen_power);
|
|
87
|
+
READ1(pca->random_rotation);
|
|
88
|
+
if (h == fourcc("PcAm"))
|
|
89
|
+
READ1(pca->balanced_bins);
|
|
90
|
+
READVECTOR(pca->mean);
|
|
91
|
+
READVECTOR(pca->eigenvalues);
|
|
92
|
+
READVECTOR(pca->PCAMat);
|
|
92
93
|
lt = pca;
|
|
93
|
-
} else if (h == fourcc
|
|
94
|
-
ITQMatrix
|
|
95
|
-
READ1
|
|
96
|
-
READ1
|
|
94
|
+
} else if (h == fourcc("Viqm")) {
|
|
95
|
+
ITQMatrix* itqm = new ITQMatrix();
|
|
96
|
+
READ1(itqm->max_iter);
|
|
97
|
+
READ1(itqm->seed);
|
|
97
98
|
lt = itqm;
|
|
98
|
-
} else if (h == fourcc
|
|
99
|
-
lt = new LinearTransform
|
|
99
|
+
} else if (h == fourcc("LTra")) {
|
|
100
|
+
lt = new LinearTransform();
|
|
100
101
|
}
|
|
101
|
-
READ1
|
|
102
|
-
READVECTOR
|
|
103
|
-
READVECTOR
|
|
104
|
-
FAISS_THROW_IF_NOT
|
|
105
|
-
FAISS_THROW_IF_NOT
|
|
102
|
+
READ1(lt->have_bias);
|
|
103
|
+
READVECTOR(lt->A);
|
|
104
|
+
READVECTOR(lt->b);
|
|
105
|
+
FAISS_THROW_IF_NOT(lt->A.size() >= lt->d_in * lt->d_out);
|
|
106
|
+
FAISS_THROW_IF_NOT(!lt->have_bias || lt->b.size() >= lt->d_out);
|
|
106
107
|
lt->set_is_orthonormal();
|
|
107
108
|
vt = lt;
|
|
108
|
-
} else if (h == fourcc
|
|
109
|
-
RemapDimensionsTransform
|
|
110
|
-
READVECTOR
|
|
109
|
+
} else if (h == fourcc("RmDT")) {
|
|
110
|
+
RemapDimensionsTransform* rdt = new RemapDimensionsTransform();
|
|
111
|
+
READVECTOR(rdt->map);
|
|
111
112
|
vt = rdt;
|
|
112
|
-
} else if (h == fourcc
|
|
113
|
-
NormalizationTransform
|
|
114
|
-
READ1
|
|
113
|
+
} else if (h == fourcc("VNrm")) {
|
|
114
|
+
NormalizationTransform* nt = new NormalizationTransform();
|
|
115
|
+
READ1(nt->norm);
|
|
115
116
|
vt = nt;
|
|
116
|
-
} else if (h == fourcc
|
|
117
|
-
CenteringTransform
|
|
118
|
-
READVECTOR
|
|
117
|
+
} else if (h == fourcc("VCnt")) {
|
|
118
|
+
CenteringTransform* ct = new CenteringTransform();
|
|
119
|
+
READVECTOR(ct->mean);
|
|
119
120
|
vt = ct;
|
|
120
|
-
} else if (h == fourcc
|
|
121
|
-
ITQTransform
|
|
121
|
+
} else if (h == fourcc("Viqt")) {
|
|
122
|
+
ITQTransform* itqt = new ITQTransform();
|
|
122
123
|
|
|
123
|
-
READVECTOR
|
|
124
|
-
READ1
|
|
124
|
+
READVECTOR(itqt->mean);
|
|
125
|
+
READ1(itqt->do_pca);
|
|
125
126
|
{
|
|
126
|
-
ITQMatrix
|
|
127
|
-
(read_VectorTransform (f));
|
|
127
|
+
ITQMatrix* itqm = dynamic_cast<ITQMatrix*>(read_VectorTransform(f));
|
|
128
128
|
FAISS_THROW_IF_NOT(itqm);
|
|
129
129
|
itqt->itq = *itqm;
|
|
130
130
|
delete itqm;
|
|
131
131
|
}
|
|
132
132
|
{
|
|
133
|
-
LinearTransform
|
|
134
|
-
|
|
135
|
-
FAISS_THROW_IF_NOT
|
|
133
|
+
LinearTransform* pi =
|
|
134
|
+
dynamic_cast<LinearTransform*>(read_VectorTransform(f));
|
|
135
|
+
FAISS_THROW_IF_NOT(pi);
|
|
136
136
|
itqt->pca_then_itq = *pi;
|
|
137
137
|
delete pi;
|
|
138
138
|
}
|
|
139
139
|
vt = itqt;
|
|
140
140
|
} else {
|
|
141
141
|
FAISS_THROW_FMT(
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
142
|
+
"fourcc %ud (\"%s\") not recognized",
|
|
143
|
+
h,
|
|
144
|
+
fourcc_inv_printable(h).c_str());
|
|
145
145
|
}
|
|
146
|
-
READ1
|
|
147
|
-
READ1
|
|
148
|
-
READ1
|
|
146
|
+
READ1(vt->d_in);
|
|
147
|
+
READ1(vt->d_out);
|
|
148
|
+
READ1(vt->is_trained);
|
|
149
149
|
return vt;
|
|
150
150
|
}
|
|
151
151
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
{
|
|
152
|
+
static void read_ArrayInvertedLists_sizes(
|
|
153
|
+
IOReader* f,
|
|
154
|
+
std::vector<size_t>& sizes) {
|
|
156
155
|
uint32_t list_type;
|
|
157
156
|
READ1(list_type);
|
|
158
157
|
if (list_type == fourcc("full")) {
|
|
159
158
|
size_t os = sizes.size();
|
|
160
|
-
READVECTOR
|
|
161
|
-
FAISS_THROW_IF_NOT
|
|
159
|
+
READVECTOR(sizes);
|
|
160
|
+
FAISS_THROW_IF_NOT(os == sizes.size());
|
|
162
161
|
} else if (list_type == fourcc("sprs")) {
|
|
163
162
|
std::vector<size_t> idsizes;
|
|
164
|
-
READVECTOR
|
|
163
|
+
READVECTOR(idsizes);
|
|
165
164
|
for (size_t j = 0; j < idsizes.size(); j += 2) {
|
|
166
|
-
FAISS_THROW_IF_NOT
|
|
165
|
+
FAISS_THROW_IF_NOT(idsizes[j] < sizes.size());
|
|
167
166
|
sizes[idsizes[j]] = idsizes[j + 1];
|
|
168
167
|
}
|
|
169
168
|
} else {
|
|
170
169
|
FAISS_THROW_FMT(
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
170
|
+
"list_type %ud (\"%s\") not recognized",
|
|
171
|
+
list_type,
|
|
172
|
+
fourcc_inv_printable(list_type).c_str());
|
|
174
173
|
}
|
|
175
174
|
}
|
|
176
175
|
|
|
177
|
-
InvertedLists
|
|
176
|
+
InvertedLists* read_InvertedLists(IOReader* f, int io_flags) {
|
|
178
177
|
uint32_t h;
|
|
179
|
-
READ1
|
|
180
|
-
if (h == fourcc
|
|
181
|
-
fprintf(stderr,
|
|
178
|
+
READ1(h);
|
|
179
|
+
if (h == fourcc("il00")) {
|
|
180
|
+
fprintf(stderr,
|
|
181
|
+
"read_InvertedLists:"
|
|
182
182
|
" WARN! inverted lists not stored with IVF object\n");
|
|
183
183
|
return nullptr;
|
|
184
|
-
} else if (h == fourcc
|
|
185
|
-
auto ails = new ArrayInvertedLists
|
|
186
|
-
READ1
|
|
187
|
-
READ1
|
|
188
|
-
ails->ids.resize
|
|
189
|
-
ails->codes.resize
|
|
190
|
-
std::vector<size_t> sizes
|
|
191
|
-
read_ArrayInvertedLists_sizes
|
|
184
|
+
} else if (h == fourcc("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
|
185
|
+
auto ails = new ArrayInvertedLists(0, 0);
|
|
186
|
+
READ1(ails->nlist);
|
|
187
|
+
READ1(ails->code_size);
|
|
188
|
+
ails->ids.resize(ails->nlist);
|
|
189
|
+
ails->codes.resize(ails->nlist);
|
|
190
|
+
std::vector<size_t> sizes(ails->nlist);
|
|
191
|
+
read_ArrayInvertedLists_sizes(f, sizes);
|
|
192
192
|
for (size_t i = 0; i < ails->nlist; i++) {
|
|
193
|
-
ails->ids[i].resize
|
|
194
|
-
ails->codes[i].resize
|
|
193
|
+
ails->ids[i].resize(sizes[i]);
|
|
194
|
+
ails->codes[i].resize(sizes[i] * ails->code_size);
|
|
195
195
|
}
|
|
196
196
|
for (size_t i = 0; i < ails->nlist; i++) {
|
|
197
197
|
size_t n = ails->ids[i].size();
|
|
198
198
|
if (n > 0) {
|
|
199
|
-
READANDCHECK
|
|
200
|
-
READANDCHECK
|
|
199
|
+
READANDCHECK(ails->codes[i].data(), n * ails->code_size);
|
|
200
|
+
READANDCHECK(ails->ids[i].data(), n);
|
|
201
201
|
}
|
|
202
202
|
}
|
|
203
203
|
return ails;
|
|
204
204
|
|
|
205
|
-
} else if (h == fourcc
|
|
206
|
-
// code is always ilxx where xx is specific to the type of invlists we
|
|
207
|
-
// so we get the 16 high bits from the io_flag and the 16 low bits
|
|
205
|
+
} else if (h == fourcc("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
|
206
|
+
// code is always ilxx where xx is specific to the type of invlists we
|
|
207
|
+
// want so we get the 16 high bits from the io_flag and the 16 low bits
|
|
208
|
+
// as "il"
|
|
208
209
|
int h2 = (io_flags & 0xffff0000) | (fourcc("il__") & 0x0000ffff);
|
|
209
210
|
size_t nlist, code_size;
|
|
210
|
-
READ1
|
|
211
|
-
READ1
|
|
212
|
-
std::vector<size_t> sizes
|
|
213
|
-
read_ArrayInvertedLists_sizes
|
|
211
|
+
READ1(nlist);
|
|
212
|
+
READ1(code_size);
|
|
213
|
+
std::vector<size_t> sizes(nlist);
|
|
214
|
+
read_ArrayInvertedLists_sizes(f, sizes);
|
|
214
215
|
return InvertedListsIOHook::lookup(h2)->read_ArrayInvertedLists(
|
|
215
216
|
f, io_flags, nlist, code_size, sizes);
|
|
216
217
|
} else {
|
|
217
218
|
return InvertedListsIOHook::lookup(h)->read(f, io_flags);
|
|
218
219
|
}
|
|
219
|
-
|
|
220
220
|
}
|
|
221
221
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
IndexIVF *ivf, IOReader *f, int io_flags) {
|
|
225
|
-
InvertedLists *ils = read_InvertedLists (f, io_flags);
|
|
222
|
+
static void read_InvertedLists(IndexIVF* ivf, IOReader* f, int io_flags) {
|
|
223
|
+
InvertedLists* ils = read_InvertedLists(f, io_flags);
|
|
226
224
|
if (ils) {
|
|
227
|
-
FAISS_THROW_IF_NOT
|
|
228
|
-
FAISS_THROW_IF_NOT
|
|
229
|
-
|
|
225
|
+
FAISS_THROW_IF_NOT(ils->nlist == ivf->nlist);
|
|
226
|
+
FAISS_THROW_IF_NOT(
|
|
227
|
+
ils->code_size == InvertedLists::INVALID_CODE_SIZE ||
|
|
228
|
+
ils->code_size == ivf->code_size);
|
|
230
229
|
}
|
|
231
230
|
ivf->invlists = ils;
|
|
232
231
|
ivf->own_invlists = true;
|
|
233
232
|
}
|
|
234
233
|
|
|
235
|
-
static void read_ProductQuantizer
|
|
236
|
-
READ1
|
|
237
|
-
READ1
|
|
238
|
-
READ1
|
|
239
|
-
pq->set_derived_values
|
|
240
|
-
READVECTOR
|
|
234
|
+
static void read_ProductQuantizer(ProductQuantizer* pq, IOReader* f) {
|
|
235
|
+
READ1(pq->d);
|
|
236
|
+
READ1(pq->M);
|
|
237
|
+
READ1(pq->nbits);
|
|
238
|
+
pq->set_derived_values();
|
|
239
|
+
READVECTOR(pq->centroids);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
static void read_ResidualQuantizer(ResidualQuantizer* rq, IOReader* f) {
|
|
243
|
+
READ1(rq->d);
|
|
244
|
+
READ1(rq->M);
|
|
245
|
+
READVECTOR(rq->nbits);
|
|
246
|
+
rq->set_derived_values();
|
|
247
|
+
READ1(rq->is_trained);
|
|
248
|
+
READ1(rq->train_type);
|
|
249
|
+
READ1(rq->max_beam_size);
|
|
250
|
+
READVECTOR(rq->codebooks);
|
|
241
251
|
}
|
|
242
252
|
|
|
243
|
-
static void read_ScalarQuantizer
|
|
244
|
-
READ1
|
|
245
|
-
READ1
|
|
246
|
-
READ1
|
|
247
|
-
READ1
|
|
248
|
-
READ1
|
|
249
|
-
READVECTOR
|
|
250
|
-
ivsc->set_derived_sizes
|
|
253
|
+
static void read_ScalarQuantizer(ScalarQuantizer* ivsc, IOReader* f) {
|
|
254
|
+
READ1(ivsc->qtype);
|
|
255
|
+
READ1(ivsc->rangestat);
|
|
256
|
+
READ1(ivsc->rangestat_arg);
|
|
257
|
+
READ1(ivsc->d);
|
|
258
|
+
READ1(ivsc->code_size);
|
|
259
|
+
READVECTOR(ivsc->trained);
|
|
260
|
+
ivsc->set_derived_sizes();
|
|
251
261
|
}
|
|
252
262
|
|
|
263
|
+
static void read_HNSW(HNSW* hnsw, IOReader* f) {
|
|
264
|
+
READVECTOR(hnsw->assign_probas);
|
|
265
|
+
READVECTOR(hnsw->cum_nneighbor_per_level);
|
|
266
|
+
READVECTOR(hnsw->levels);
|
|
267
|
+
READVECTOR(hnsw->offsets);
|
|
268
|
+
READVECTOR(hnsw->neighbors);
|
|
269
|
+
|
|
270
|
+
READ1(hnsw->entry_point);
|
|
271
|
+
READ1(hnsw->max_level);
|
|
272
|
+
READ1(hnsw->efConstruction);
|
|
273
|
+
READ1(hnsw->efSearch);
|
|
274
|
+
READ1(hnsw->upper_beam);
|
|
275
|
+
}
|
|
253
276
|
|
|
254
|
-
static void
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
277
|
+
static void read_NSG(NSG* nsg, IOReader* f) {
|
|
278
|
+
READ1(nsg->ntotal);
|
|
279
|
+
READ1(nsg->R);
|
|
280
|
+
READ1(nsg->L);
|
|
281
|
+
READ1(nsg->C);
|
|
282
|
+
READ1(nsg->search_L);
|
|
283
|
+
READ1(nsg->enterpoint);
|
|
284
|
+
READ1(nsg->is_built);
|
|
285
|
+
|
|
286
|
+
if (!nsg->is_built) {
|
|
287
|
+
return;
|
|
288
|
+
}
|
|
260
289
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
290
|
+
constexpr int EMPTY_ID = -1;
|
|
291
|
+
int N = nsg->ntotal;
|
|
292
|
+
int R = nsg->R;
|
|
293
|
+
auto& graph = nsg->final_graph;
|
|
294
|
+
graph = std::make_shared<nsg::Graph<int>>(N, R);
|
|
295
|
+
std::fill_n(graph->data, N * R, EMPTY_ID);
|
|
296
|
+
|
|
297
|
+
int size = 0;
|
|
298
|
+
|
|
299
|
+
for (int i = 0; i < N; i++) {
|
|
300
|
+
for (int j = 0; j < R + 1; j++) {
|
|
301
|
+
int id;
|
|
302
|
+
READ1(id);
|
|
303
|
+
if (id != EMPTY_ID) {
|
|
304
|
+
graph->at(i, j) = id;
|
|
305
|
+
size += 1;
|
|
306
|
+
} else {
|
|
307
|
+
break;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}
|
|
266
311
|
}
|
|
267
312
|
|
|
268
|
-
ProductQuantizer
|
|
313
|
+
ProductQuantizer* read_ProductQuantizer(const char* fname) {
|
|
269
314
|
FileIOReader reader(fname);
|
|
270
315
|
return read_ProductQuantizer(&reader);
|
|
271
316
|
}
|
|
272
317
|
|
|
273
|
-
ProductQuantizer
|
|
274
|
-
|
|
275
|
-
|
|
318
|
+
ProductQuantizer* read_ProductQuantizer(IOReader* reader) {
|
|
319
|
+
ProductQuantizer* pq = new ProductQuantizer();
|
|
320
|
+
ScopeDeleter1<ProductQuantizer> del(pq);
|
|
276
321
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
322
|
+
read_ProductQuantizer(pq, reader);
|
|
323
|
+
del.release();
|
|
324
|
+
return pq;
|
|
280
325
|
}
|
|
281
326
|
|
|
282
|
-
static void read_direct_map
|
|
327
|
+
static void read_direct_map(DirectMap* dm, IOReader* f) {
|
|
283
328
|
char maintain_direct_map;
|
|
284
|
-
READ1
|
|
329
|
+
READ1(maintain_direct_map);
|
|
285
330
|
dm->type = (DirectMap::Type)maintain_direct_map;
|
|
286
|
-
READVECTOR
|
|
331
|
+
READVECTOR(dm->array);
|
|
287
332
|
if (dm->type == DirectMap::Hashtable) {
|
|
288
333
|
using idx_t = Index::idx_t;
|
|
289
334
|
std::vector<std::pair<idx_t, idx_t>> v;
|
|
290
|
-
READVECTOR
|
|
291
|
-
std::unordered_map<idx_t, idx_t
|
|
292
|
-
map.reserve
|
|
293
|
-
for (auto it: v) {
|
|
294
|
-
map
|
|
335
|
+
READVECTOR(v);
|
|
336
|
+
std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
|
|
337
|
+
map.reserve(v.size());
|
|
338
|
+
for (auto it : v) {
|
|
339
|
+
map[it.first] = it.second;
|
|
295
340
|
}
|
|
296
341
|
}
|
|
297
|
-
|
|
298
342
|
}
|
|
299
343
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
READ1
|
|
307
|
-
|
|
308
|
-
ivf->quantizer = read_index (f);
|
|
344
|
+
static void read_ivf_header(
|
|
345
|
+
IndexIVF* ivf,
|
|
346
|
+
IOReader* f,
|
|
347
|
+
std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
|
|
348
|
+
read_index_header(ivf, f);
|
|
349
|
+
READ1(ivf->nlist);
|
|
350
|
+
READ1(ivf->nprobe);
|
|
351
|
+
ivf->quantizer = read_index(f);
|
|
309
352
|
ivf->own_fields = true;
|
|
310
353
|
if (ids) { // used in legacy "Iv" formats
|
|
311
|
-
ids->resize
|
|
354
|
+
ids->resize(ivf->nlist);
|
|
312
355
|
for (size_t i = 0; i < ivf->nlist; i++)
|
|
313
|
-
READVECTOR
|
|
356
|
+
READVECTOR((*ids)[i]);
|
|
314
357
|
}
|
|
315
|
-
read_direct_map
|
|
358
|
+
read_direct_map(&ivf->direct_map, f);
|
|
316
359
|
}
|
|
317
360
|
|
|
318
361
|
// used for legacy formats
|
|
319
|
-
static ArrayInvertedLists
|
|
320
|
-
|
|
321
|
-
{
|
|
322
|
-
ArrayInvertedLists
|
|
323
|
-
|
|
324
|
-
std::swap
|
|
362
|
+
static ArrayInvertedLists* set_array_invlist(
|
|
363
|
+
IndexIVF* ivf,
|
|
364
|
+
std::vector<std::vector<Index::idx_t>>& ids) {
|
|
365
|
+
ArrayInvertedLists* ail =
|
|
366
|
+
new ArrayInvertedLists(ivf->nlist, ivf->code_size);
|
|
367
|
+
std::swap(ail->ids, ids);
|
|
325
368
|
ivf->invlists = ail;
|
|
326
369
|
ivf->own_invlists = true;
|
|
327
370
|
return ail;
|
|
328
371
|
}
|
|
329
372
|
|
|
330
|
-
static IndexIVFPQ
|
|
331
|
-
|
|
332
|
-
bool legacy = h == fourcc ("IvQR") || h == fourcc ("IvPQ");
|
|
373
|
+
static IndexIVFPQ* read_ivfpq(IOReader* f, uint32_t h, int io_flags) {
|
|
374
|
+
bool legacy = h == fourcc("IvQR") || h == fourcc("IvPQ");
|
|
333
375
|
|
|
334
|
-
IndexIVFPQR
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
IndexIVFPQ
|
|
376
|
+
IndexIVFPQR* ivfpqr = h == fourcc("IvQR") || h == fourcc("IwQR")
|
|
377
|
+
? new IndexIVFPQR()
|
|
378
|
+
: nullptr;
|
|
379
|
+
IndexIVFPQ* ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ();
|
|
338
380
|
|
|
339
|
-
std::vector<std::vector<Index::idx_t
|
|
340
|
-
read_ivf_header
|
|
341
|
-
READ1
|
|
342
|
-
READ1
|
|
343
|
-
read_ProductQuantizer
|
|
381
|
+
std::vector<std::vector<Index::idx_t>> ids;
|
|
382
|
+
read_ivf_header(ivpq, f, legacy ? &ids : nullptr);
|
|
383
|
+
READ1(ivpq->by_residual);
|
|
384
|
+
READ1(ivpq->code_size);
|
|
385
|
+
read_ProductQuantizer(&ivpq->pq, f);
|
|
344
386
|
|
|
345
387
|
if (legacy) {
|
|
346
|
-
ArrayInvertedLists
|
|
388
|
+
ArrayInvertedLists* ail = set_array_invlist(ivpq, ids);
|
|
347
389
|
for (size_t i = 0; i < ail->nlist; i++)
|
|
348
|
-
READVECTOR
|
|
390
|
+
READVECTOR(ail->codes[i]);
|
|
349
391
|
} else {
|
|
350
|
-
read_InvertedLists
|
|
392
|
+
read_InvertedLists(ivpq, f, io_flags);
|
|
351
393
|
}
|
|
352
394
|
|
|
353
395
|
if (ivpq->is_trained) {
|
|
354
396
|
// precomputed table not stored. It is cheaper to recompute it
|
|
355
397
|
ivpq->use_precomputed_table = 0;
|
|
356
398
|
if (ivpq->by_residual)
|
|
357
|
-
ivpq->precompute_table
|
|
399
|
+
ivpq->precompute_table();
|
|
358
400
|
if (ivfpqr) {
|
|
359
|
-
read_ProductQuantizer
|
|
360
|
-
READVECTOR
|
|
361
|
-
READ1
|
|
401
|
+
read_ProductQuantizer(&ivfpqr->refine_pq, f);
|
|
402
|
+
READVECTOR(ivfpqr->refine_codes);
|
|
403
|
+
READ1(ivfpqr->k_factor);
|
|
362
404
|
}
|
|
363
405
|
}
|
|
364
406
|
return ivpq;
|
|
@@ -366,200 +408,216 @@ static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags)
|
|
|
366
408
|
|
|
367
409
|
int read_old_fmt_hack = 0;
|
|
368
410
|
|
|
369
|
-
Index
|
|
370
|
-
Index
|
|
411
|
+
Index* read_index(IOReader* f, int io_flags) {
|
|
412
|
+
Index* idx = nullptr;
|
|
371
413
|
uint32_t h;
|
|
372
|
-
READ1
|
|
373
|
-
if (h == fourcc
|
|
374
|
-
IndexFlat
|
|
375
|
-
if (h == fourcc
|
|
376
|
-
idxf = new IndexFlatIP
|
|
414
|
+
READ1(h);
|
|
415
|
+
if (h == fourcc("IxFI") || h == fourcc("IxF2") || h == fourcc("IxFl")) {
|
|
416
|
+
IndexFlat* idxf;
|
|
417
|
+
if (h == fourcc("IxFI")) {
|
|
418
|
+
idxf = new IndexFlatIP();
|
|
377
419
|
} else if (h == fourcc("IxF2")) {
|
|
378
|
-
idxf = new IndexFlatL2
|
|
420
|
+
idxf = new IndexFlatL2();
|
|
379
421
|
} else {
|
|
380
|
-
idxf = new IndexFlat
|
|
422
|
+
idxf = new IndexFlat();
|
|
381
423
|
}
|
|
382
|
-
read_index_header
|
|
383
|
-
READVECTOR
|
|
384
|
-
FAISS_THROW_IF_NOT
|
|
424
|
+
read_index_header(idxf, f);
|
|
425
|
+
READVECTOR(idxf->xb);
|
|
426
|
+
FAISS_THROW_IF_NOT(idxf->xb.size() == idxf->ntotal * idxf->d);
|
|
385
427
|
// leak!
|
|
386
428
|
idx = idxf;
|
|
387
429
|
} else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
|
|
388
|
-
IndexLSH
|
|
389
|
-
read_index_header
|
|
390
|
-
READ1
|
|
391
|
-
READ1
|
|
392
|
-
READ1
|
|
393
|
-
READVECTOR
|
|
394
|
-
READ1
|
|
430
|
+
IndexLSH* idxl = new IndexLSH();
|
|
431
|
+
read_index_header(idxl, f);
|
|
432
|
+
READ1(idxl->nbits);
|
|
433
|
+
READ1(idxl->rotate_data);
|
|
434
|
+
READ1(idxl->train_thresholds);
|
|
435
|
+
READVECTOR(idxl->thresholds);
|
|
436
|
+
READ1(idxl->bytes_per_vec);
|
|
395
437
|
if (h == fourcc("IxHE")) {
|
|
396
|
-
FAISS_THROW_IF_NOT_FMT
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
438
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
439
|
+
idxl->nbits % 64 == 0,
|
|
440
|
+
"can only read old format IndexLSH with "
|
|
441
|
+
"nbits multiple of 64 (got %d)",
|
|
442
|
+
(int)idxl->nbits);
|
|
400
443
|
// leak
|
|
401
444
|
idxl->bytes_per_vec *= 8;
|
|
402
445
|
}
|
|
403
446
|
{
|
|
404
|
-
RandomRotationMatrix
|
|
405
|
-
|
|
447
|
+
RandomRotationMatrix* rrot = dynamic_cast<RandomRotationMatrix*>(
|
|
448
|
+
read_VectorTransform(f));
|
|
406
449
|
FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation");
|
|
407
450
|
idxl->rrot = *rrot;
|
|
408
451
|
delete rrot;
|
|
409
452
|
}
|
|
410
|
-
READVECTOR
|
|
411
|
-
FAISS_THROW_IF_NOT
|
|
412
|
-
|
|
413
|
-
FAISS_THROW_IF_NOT
|
|
414
|
-
|
|
453
|
+
READVECTOR(idxl->codes);
|
|
454
|
+
FAISS_THROW_IF_NOT(
|
|
455
|
+
idxl->rrot.d_in == idxl->d && idxl->rrot.d_out == idxl->nbits);
|
|
456
|
+
FAISS_THROW_IF_NOT(
|
|
457
|
+
idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
|
|
415
458
|
idx = idxl;
|
|
416
|
-
} else if (
|
|
417
|
-
|
|
459
|
+
} else if (
|
|
460
|
+
h == fourcc("IxPQ") || h == fourcc("IxPo") || h == fourcc("IxPq")) {
|
|
418
461
|
// IxPQ and IxPo were merged into the same IndexPQ object
|
|
419
|
-
IndexPQ
|
|
420
|
-
read_index_header
|
|
421
|
-
read_ProductQuantizer
|
|
422
|
-
READVECTOR
|
|
423
|
-
if (h == fourcc
|
|
424
|
-
READ1
|
|
425
|
-
READ1
|
|
426
|
-
READ1
|
|
462
|
+
IndexPQ* idxp = new IndexPQ();
|
|
463
|
+
read_index_header(idxp, f);
|
|
464
|
+
read_ProductQuantizer(&idxp->pq, f);
|
|
465
|
+
READVECTOR(idxp->codes);
|
|
466
|
+
if (h == fourcc("IxPo") || h == fourcc("IxPq")) {
|
|
467
|
+
READ1(idxp->search_type);
|
|
468
|
+
READ1(idxp->encode_signs);
|
|
469
|
+
READ1(idxp->polysemous_ht);
|
|
427
470
|
}
|
|
428
471
|
// Old versoins of PQ all had metric_type set to INNER_PRODUCT
|
|
429
472
|
// when they were in fact using L2. Therefore, we force metric type
|
|
430
473
|
// to L2 when the old format is detected
|
|
431
|
-
if (h == fourcc
|
|
474
|
+
if (h == fourcc("IxPQ") || h == fourcc("IxPo")) {
|
|
432
475
|
idxp->metric_type = METRIC_L2;
|
|
433
476
|
}
|
|
434
477
|
idx = idxp;
|
|
435
|
-
} else if (h == fourcc
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
478
|
+
} else if (h == fourcc("IxRQ")) {
|
|
479
|
+
IndexResidual* idxr = new IndexResidual();
|
|
480
|
+
read_index_header(idxr, f);
|
|
481
|
+
read_ResidualQuantizer(&idxr->rq, f);
|
|
482
|
+
READ1(idxr->search_type);
|
|
483
|
+
READ1(idxr->norm_min);
|
|
484
|
+
READ1(idxr->norm_max);
|
|
485
|
+
READ1(idxr->code_size);
|
|
486
|
+
READVECTOR(idxr->codes);
|
|
487
|
+
idx = idxr;
|
|
488
|
+
} else if (h == fourcc("ImRQ")) {
|
|
489
|
+
ResidualCoarseQuantizer* idxr = new ResidualCoarseQuantizer();
|
|
490
|
+
read_index_header(idxr, f);
|
|
491
|
+
read_ResidualQuantizer(&idxr->rq, f);
|
|
492
|
+
READ1(idxr->beam_factor);
|
|
493
|
+
idxr->set_beam_factor(idxr->beam_factor);
|
|
494
|
+
idx = idxr;
|
|
495
|
+
} else if (h == fourcc("IvFl") || h == fourcc("IvFL")) { // legacy
|
|
496
|
+
IndexIVFFlat* ivfl = new IndexIVFFlat();
|
|
497
|
+
std::vector<std::vector<Index::idx_t>> ids;
|
|
498
|
+
read_ivf_header(ivfl, f, &ids);
|
|
439
499
|
ivfl->code_size = ivfl->d * sizeof(float);
|
|
440
|
-
ArrayInvertedLists
|
|
500
|
+
ArrayInvertedLists* ail = set_array_invlist(ivfl, ids);
|
|
441
501
|
|
|
442
|
-
if (h == fourcc
|
|
502
|
+
if (h == fourcc("IvFL")) {
|
|
443
503
|
for (size_t i = 0; i < ivfl->nlist; i++) {
|
|
444
|
-
READVECTOR
|
|
504
|
+
READVECTOR(ail->codes[i]);
|
|
445
505
|
}
|
|
446
506
|
} else { // old format
|
|
447
507
|
for (size_t i = 0; i < ivfl->nlist; i++) {
|
|
448
508
|
std::vector<float> vec;
|
|
449
|
-
READVECTOR
|
|
509
|
+
READVECTOR(vec);
|
|
450
510
|
ail->codes[i].resize(vec.size() * sizeof(float));
|
|
451
|
-
memcpy(ail->codes[i].data(), vec.data(),
|
|
452
|
-
ail->codes[i].size());
|
|
511
|
+
memcpy(ail->codes[i].data(), vec.data(), ail->codes[i].size());
|
|
453
512
|
}
|
|
454
513
|
}
|
|
455
514
|
idx = ivfl;
|
|
456
|
-
} else if (h == fourcc
|
|
457
|
-
IndexIVFFlatDedup
|
|
458
|
-
read_ivf_header
|
|
515
|
+
} else if (h == fourcc("IwFd")) {
|
|
516
|
+
IndexIVFFlatDedup* ivfl = new IndexIVFFlatDedup();
|
|
517
|
+
read_ivf_header(ivfl, f);
|
|
459
518
|
ivfl->code_size = ivfl->d * sizeof(float);
|
|
460
519
|
{
|
|
461
520
|
std::vector<Index::idx_t> tab;
|
|
462
|
-
READVECTOR
|
|
521
|
+
READVECTOR(tab);
|
|
463
522
|
for (long i = 0; i < tab.size(); i += 2) {
|
|
464
|
-
std::pair<Index::idx_t, Index::idx_t>
|
|
465
|
-
|
|
466
|
-
ivfl->instances.insert (pair);
|
|
523
|
+
std::pair<Index::idx_t, Index::idx_t> pair(tab[i], tab[i + 1]);
|
|
524
|
+
ivfl->instances.insert(pair);
|
|
467
525
|
}
|
|
468
526
|
}
|
|
469
|
-
read_InvertedLists
|
|
527
|
+
read_InvertedLists(ivfl, f, io_flags);
|
|
470
528
|
idx = ivfl;
|
|
471
|
-
} else if (h == fourcc
|
|
472
|
-
IndexIVFFlat
|
|
473
|
-
read_ivf_header
|
|
529
|
+
} else if (h == fourcc("IwFl")) {
|
|
530
|
+
IndexIVFFlat* ivfl = new IndexIVFFlat();
|
|
531
|
+
read_ivf_header(ivfl, f);
|
|
474
532
|
ivfl->code_size = ivfl->d * sizeof(float);
|
|
475
|
-
read_InvertedLists
|
|
533
|
+
read_InvertedLists(ivfl, f, io_flags);
|
|
476
534
|
idx = ivfl;
|
|
477
|
-
} else if (h == fourcc
|
|
478
|
-
IndexScalarQuantizer
|
|
479
|
-
read_index_header
|
|
480
|
-
read_ScalarQuantizer
|
|
481
|
-
READVECTOR
|
|
535
|
+
} else if (h == fourcc("IxSQ")) {
|
|
536
|
+
IndexScalarQuantizer* idxs = new IndexScalarQuantizer();
|
|
537
|
+
read_index_header(idxs, f);
|
|
538
|
+
read_ScalarQuantizer(&idxs->sq, f);
|
|
539
|
+
READVECTOR(idxs->codes);
|
|
482
540
|
idxs->code_size = idxs->sq.code_size;
|
|
483
541
|
idx = idxs;
|
|
484
|
-
} else if (h == fourcc
|
|
542
|
+
} else if (h == fourcc("IxLa")) {
|
|
485
543
|
int d, nsq, scale_nbit, r2;
|
|
486
|
-
READ1
|
|
487
|
-
READ1
|
|
488
|
-
READ1
|
|
489
|
-
READ1
|
|
490
|
-
IndexLattice
|
|
491
|
-
read_index_header
|
|
492
|
-
READVECTOR
|
|
544
|
+
READ1(d);
|
|
545
|
+
READ1(nsq);
|
|
546
|
+
READ1(scale_nbit);
|
|
547
|
+
READ1(r2);
|
|
548
|
+
IndexLattice* idxl = new IndexLattice(d, nsq, scale_nbit, r2);
|
|
549
|
+
read_index_header(idxl, f);
|
|
550
|
+
READVECTOR(idxl->trained);
|
|
493
551
|
idx = idxl;
|
|
494
|
-
} else if(h == fourcc
|
|
495
|
-
IndexIVFScalarQuantizer
|
|
496
|
-
std::vector<std::vector<Index::idx_t
|
|
497
|
-
read_ivf_header
|
|
498
|
-
read_ScalarQuantizer
|
|
499
|
-
READ1
|
|
500
|
-
ArrayInvertedLists
|
|
501
|
-
for(int i = 0; i < ivsc->nlist; i++)
|
|
502
|
-
READVECTOR
|
|
552
|
+
} else if (h == fourcc("IvSQ")) { // legacy
|
|
553
|
+
IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
|
|
554
|
+
std::vector<std::vector<Index::idx_t>> ids;
|
|
555
|
+
read_ivf_header(ivsc, f, &ids);
|
|
556
|
+
read_ScalarQuantizer(&ivsc->sq, f);
|
|
557
|
+
READ1(ivsc->code_size);
|
|
558
|
+
ArrayInvertedLists* ail = set_array_invlist(ivsc, ids);
|
|
559
|
+
for (int i = 0; i < ivsc->nlist; i++)
|
|
560
|
+
READVECTOR(ail->codes[i]);
|
|
503
561
|
idx = ivsc;
|
|
504
|
-
} else if(h == fourcc
|
|
505
|
-
IndexIVFScalarQuantizer
|
|
506
|
-
read_ivf_header
|
|
507
|
-
read_ScalarQuantizer
|
|
508
|
-
READ1
|
|
509
|
-
if (h == fourcc
|
|
562
|
+
} else if (h == fourcc("IwSQ") || h == fourcc("IwSq")) {
|
|
563
|
+
IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
|
|
564
|
+
read_ivf_header(ivsc, f);
|
|
565
|
+
read_ScalarQuantizer(&ivsc->sq, f);
|
|
566
|
+
READ1(ivsc->code_size);
|
|
567
|
+
if (h == fourcc("IwSQ")) {
|
|
510
568
|
ivsc->by_residual = true;
|
|
511
569
|
} else {
|
|
512
|
-
READ1
|
|
570
|
+
READ1(ivsc->by_residual);
|
|
513
571
|
}
|
|
514
|
-
read_InvertedLists
|
|
572
|
+
read_InvertedLists(ivsc, f, io_flags);
|
|
515
573
|
idx = ivsc;
|
|
516
|
-
} else if(h == fourcc
|
|
517
|
-
IndexIVFSpectralHash
|
|
518
|
-
read_ivf_header
|
|
519
|
-
ivsp->vt = read_VectorTransform
|
|
574
|
+
} else if (h == fourcc("IwSh")) {
|
|
575
|
+
IndexIVFSpectralHash* ivsp = new IndexIVFSpectralHash();
|
|
576
|
+
read_ivf_header(ivsp, f);
|
|
577
|
+
ivsp->vt = read_VectorTransform(f);
|
|
520
578
|
ivsp->own_fields = true;
|
|
521
|
-
READ1
|
|
579
|
+
READ1(ivsp->nbit);
|
|
522
580
|
// not stored by write_ivf_header
|
|
523
581
|
ivsp->code_size = (ivsp->nbit + 7) / 8;
|
|
524
|
-
READ1
|
|
525
|
-
READ1
|
|
526
|
-
READVECTOR
|
|
527
|
-
read_InvertedLists
|
|
582
|
+
READ1(ivsp->period);
|
|
583
|
+
READ1(ivsp->threshold_type);
|
|
584
|
+
READVECTOR(ivsp->trained);
|
|
585
|
+
read_InvertedLists(ivsp, f, io_flags);
|
|
528
586
|
idx = ivsp;
|
|
529
|
-
} else if
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
idx = read_ivfpq
|
|
587
|
+
} else if (
|
|
588
|
+
h == fourcc("IvPQ") || h == fourcc("IvQR") || h == fourcc("IwPQ") ||
|
|
589
|
+
h == fourcc("IwQR")) {
|
|
590
|
+
idx = read_ivfpq(f, h, io_flags);
|
|
533
591
|
|
|
534
|
-
} else if(h == fourcc
|
|
535
|
-
IndexPreTransform
|
|
592
|
+
} else if (h == fourcc("IxPT")) {
|
|
593
|
+
IndexPreTransform* ixpt = new IndexPreTransform();
|
|
536
594
|
ixpt->own_fields = true;
|
|
537
|
-
read_index_header
|
|
595
|
+
read_index_header(ixpt, f);
|
|
538
596
|
int nt;
|
|
539
597
|
if (read_old_fmt_hack == 2) {
|
|
540
598
|
nt = 1;
|
|
541
599
|
} else {
|
|
542
|
-
READ1
|
|
600
|
+
READ1(nt);
|
|
543
601
|
}
|
|
544
602
|
for (int i = 0; i < nt; i++) {
|
|
545
|
-
ixpt->chain.push_back
|
|
603
|
+
ixpt->chain.push_back(read_VectorTransform(f));
|
|
546
604
|
}
|
|
547
|
-
ixpt->index = read_index
|
|
605
|
+
ixpt->index = read_index(f, io_flags);
|
|
548
606
|
idx = ixpt;
|
|
549
|
-
} else if(h == fourcc
|
|
550
|
-
MultiIndexQuantizer
|
|
551
|
-
read_index_header
|
|
552
|
-
read_ProductQuantizer
|
|
607
|
+
} else if (h == fourcc("Imiq")) {
|
|
608
|
+
MultiIndexQuantizer* imiq = new MultiIndexQuantizer();
|
|
609
|
+
read_index_header(imiq, f);
|
|
610
|
+
read_ProductQuantizer(&imiq->pq, f);
|
|
553
611
|
idx = imiq;
|
|
554
|
-
} else if(h == fourcc
|
|
555
|
-
IndexRefine
|
|
556
|
-
read_index_header
|
|
612
|
+
} else if (h == fourcc("IxRF")) {
|
|
613
|
+
IndexRefine* idxrf = new IndexRefine();
|
|
614
|
+
read_index_header(idxrf, f);
|
|
557
615
|
idxrf->base_index = read_index(f, io_flags);
|
|
558
616
|
idxrf->refine_index = read_index(f, io_flags);
|
|
559
|
-
READ1
|
|
617
|
+
READ1(idxrf->k_factor);
|
|
560
618
|
if (dynamic_cast<IndexFlat*>(idxrf->refine_index)) {
|
|
561
619
|
// then make a RefineFlat with it
|
|
562
|
-
IndexRefine
|
|
620
|
+
IndexRefine* idxrf_old = idxrf;
|
|
563
621
|
idxrf = new IndexRefineFlat();
|
|
564
622
|
*idxrf = *idxrf_old;
|
|
565
623
|
delete idxrf_old;
|
|
@@ -567,248 +625,260 @@ Index *read_index (IOReader *f, int io_flags) {
|
|
|
567
625
|
idxrf->own_fields = true;
|
|
568
626
|
idxrf->own_refine_index = true;
|
|
569
627
|
idx = idxrf;
|
|
570
|
-
} else if(h == fourcc
|
|
571
|
-
bool is_map2 = h == fourcc
|
|
572
|
-
IndexIDMap
|
|
573
|
-
read_index_header
|
|
574
|
-
idxmap->index = read_index
|
|
628
|
+
} else if (h == fourcc("IxMp") || h == fourcc("IxM2")) {
|
|
629
|
+
bool is_map2 = h == fourcc("IxM2");
|
|
630
|
+
IndexIDMap* idxmap = is_map2 ? new IndexIDMap2() : new IndexIDMap();
|
|
631
|
+
read_index_header(idxmap, f);
|
|
632
|
+
idxmap->index = read_index(f, io_flags);
|
|
575
633
|
idxmap->own_fields = true;
|
|
576
|
-
READVECTOR
|
|
634
|
+
READVECTOR(idxmap->id_map);
|
|
577
635
|
if (is_map2) {
|
|
578
|
-
static_cast<IndexIDMap2*>(idxmap)->construct_rev_map
|
|
636
|
+
static_cast<IndexIDMap2*>(idxmap)->construct_rev_map();
|
|
579
637
|
}
|
|
580
638
|
idx = idxmap;
|
|
581
|
-
} else if (h == fourcc
|
|
582
|
-
Index2Layer
|
|
583
|
-
read_index_header
|
|
584
|
-
idxp->q1.quantizer = read_index
|
|
585
|
-
READ1
|
|
586
|
-
READ1
|
|
587
|
-
read_ProductQuantizer
|
|
588
|
-
READ1
|
|
589
|
-
READ1
|
|
590
|
-
READ1
|
|
591
|
-
READVECTOR
|
|
639
|
+
} else if (h == fourcc("Ix2L")) {
|
|
640
|
+
Index2Layer* idxp = new Index2Layer();
|
|
641
|
+
read_index_header(idxp, f);
|
|
642
|
+
idxp->q1.quantizer = read_index(f, io_flags);
|
|
643
|
+
READ1(idxp->q1.nlist);
|
|
644
|
+
READ1(idxp->q1.quantizer_trains_alone);
|
|
645
|
+
read_ProductQuantizer(&idxp->pq, f);
|
|
646
|
+
READ1(idxp->code_size_1);
|
|
647
|
+
READ1(idxp->code_size_2);
|
|
648
|
+
READ1(idxp->code_size);
|
|
649
|
+
READVECTOR(idxp->codes);
|
|
592
650
|
idx = idxp;
|
|
593
|
-
} else if
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
if (h == fourcc("
|
|
598
|
-
|
|
599
|
-
if (h == fourcc("
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
651
|
+
} else if (
|
|
652
|
+
h == fourcc("IHNf") || h == fourcc("IHNp") || h == fourcc("IHNs") ||
|
|
653
|
+
h == fourcc("IHN2")) {
|
|
654
|
+
IndexHNSW* idxhnsw = nullptr;
|
|
655
|
+
if (h == fourcc("IHNf"))
|
|
656
|
+
idxhnsw = new IndexHNSWFlat();
|
|
657
|
+
if (h == fourcc("IHNp"))
|
|
658
|
+
idxhnsw = new IndexHNSWPQ();
|
|
659
|
+
if (h == fourcc("IHNs"))
|
|
660
|
+
idxhnsw = new IndexHNSWSQ();
|
|
661
|
+
if (h == fourcc("IHN2"))
|
|
662
|
+
idxhnsw = new IndexHNSW2Level();
|
|
663
|
+
read_index_header(idxhnsw, f);
|
|
664
|
+
read_HNSW(&idxhnsw->hnsw, f);
|
|
665
|
+
idxhnsw->storage = read_index(f, io_flags);
|
|
603
666
|
idxhnsw->own_fields = true;
|
|
604
667
|
if (h == fourcc("IHNp")) {
|
|
605
|
-
dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table
|
|
668
|
+
dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table();
|
|
606
669
|
}
|
|
607
670
|
idx = idxhnsw;
|
|
608
|
-
} else if(h == fourcc("
|
|
609
|
-
|
|
610
|
-
read_index_header
|
|
611
|
-
|
|
612
|
-
READ1
|
|
613
|
-
READ1
|
|
614
|
-
READ1
|
|
615
|
-
READ1
|
|
616
|
-
READ1
|
|
617
|
-
|
|
671
|
+
} else if (h == fourcc("INSf")) {
|
|
672
|
+
IndexNSG* idxnsg = new IndexNSGFlat();
|
|
673
|
+
read_index_header(idxnsg, f);
|
|
674
|
+
READ1(idxnsg->GK);
|
|
675
|
+
READ1(idxnsg->build_type);
|
|
676
|
+
READ1(idxnsg->nndescent_S);
|
|
677
|
+
READ1(idxnsg->nndescent_R);
|
|
678
|
+
READ1(idxnsg->nndescent_L);
|
|
679
|
+
READ1(idxnsg->nndescent_iter);
|
|
680
|
+
read_NSG(&idxnsg->nsg, f);
|
|
681
|
+
idxnsg->storage = read_index(f, io_flags);
|
|
682
|
+
idxnsg->own_fields = true;
|
|
683
|
+
idx = idxnsg;
|
|
684
|
+
} else if (h == fourcc("IPfs")) {
|
|
685
|
+
IndexPQFastScan* idxpqfs = new IndexPQFastScan();
|
|
686
|
+
read_index_header(idxpqfs, f);
|
|
687
|
+
read_ProductQuantizer(&idxpqfs->pq, f);
|
|
688
|
+
READ1(idxpqfs->implem);
|
|
689
|
+
READ1(idxpqfs->bbs);
|
|
690
|
+
READ1(idxpqfs->qbs);
|
|
691
|
+
READ1(idxpqfs->ntotal2);
|
|
692
|
+
READ1(idxpqfs->M2);
|
|
693
|
+
READVECTOR(idxpqfs->codes);
|
|
618
694
|
idx = idxpqfs;
|
|
619
695
|
|
|
620
696
|
} else if (h == fourcc("IwPf")) {
|
|
621
|
-
IndexIVFPQFastScan
|
|
622
|
-
read_ivf_header
|
|
623
|
-
READ1
|
|
624
|
-
READ1
|
|
625
|
-
READ1
|
|
626
|
-
READ1
|
|
627
|
-
READ1
|
|
628
|
-
READ1
|
|
629
|
-
read_ProductQuantizer
|
|
630
|
-
read_InvertedLists
|
|
697
|
+
IndexIVFPQFastScan* ivpq = new IndexIVFPQFastScan();
|
|
698
|
+
read_ivf_header(ivpq, f);
|
|
699
|
+
READ1(ivpq->by_residual);
|
|
700
|
+
READ1(ivpq->code_size);
|
|
701
|
+
READ1(ivpq->bbs);
|
|
702
|
+
READ1(ivpq->M2);
|
|
703
|
+
READ1(ivpq->implem);
|
|
704
|
+
READ1(ivpq->qbs2);
|
|
705
|
+
read_ProductQuantizer(&ivpq->pq, f);
|
|
706
|
+
read_InvertedLists(ivpq, f, io_flags);
|
|
631
707
|
ivpq->precompute_table();
|
|
632
708
|
idx = ivpq;
|
|
633
709
|
} else {
|
|
634
710
|
FAISS_THROW_FMT(
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
711
|
+
"Index type 0x%08x (\"%s\") not recognized",
|
|
712
|
+
h,
|
|
713
|
+
fourcc_inv_printable(h).c_str());
|
|
638
714
|
idx = nullptr;
|
|
639
715
|
}
|
|
640
716
|
return idx;
|
|
641
717
|
}
|
|
642
718
|
|
|
643
|
-
|
|
644
|
-
Index *read_index (FILE * f, int io_flags) {
|
|
719
|
+
Index* read_index(FILE* f, int io_flags) {
|
|
645
720
|
FileIOReader reader(f);
|
|
646
721
|
return read_index(&reader, io_flags);
|
|
647
722
|
}
|
|
648
723
|
|
|
649
|
-
Index
|
|
724
|
+
Index* read_index(const char* fname, int io_flags) {
|
|
650
725
|
FileIOReader reader(fname);
|
|
651
|
-
Index
|
|
726
|
+
Index* idx = read_index(&reader, io_flags);
|
|
652
727
|
return idx;
|
|
653
728
|
}
|
|
654
729
|
|
|
655
|
-
VectorTransform
|
|
730
|
+
VectorTransform* read_VectorTransform(const char* fname) {
|
|
656
731
|
FileIOReader reader(fname);
|
|
657
|
-
VectorTransform
|
|
732
|
+
VectorTransform* vt = read_VectorTransform(&reader);
|
|
658
733
|
return vt;
|
|
659
734
|
}
|
|
660
735
|
|
|
661
|
-
|
|
662
|
-
|
|
663
736
|
/*************************************************************
|
|
664
737
|
* Read binary indexes
|
|
665
738
|
**************************************************************/
|
|
666
739
|
|
|
667
|
-
static void read_InvertedLists
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
740
|
+
static void read_InvertedLists(IndexBinaryIVF* ivf, IOReader* f, int io_flags) {
|
|
741
|
+
InvertedLists* ils = read_InvertedLists(f, io_flags);
|
|
742
|
+
FAISS_THROW_IF_NOT(
|
|
743
|
+
!ils ||
|
|
744
|
+
(ils->nlist == ivf->nlist && ils->code_size == ivf->code_size));
|
|
672
745
|
ivf->invlists = ils;
|
|
673
746
|
ivf->own_invlists = true;
|
|
674
747
|
}
|
|
675
748
|
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
READ1
|
|
680
|
-
READ1
|
|
681
|
-
READ1
|
|
682
|
-
READ1 (idx->is_trained);
|
|
683
|
-
READ1 (idx->metric_type);
|
|
749
|
+
static void read_index_binary_header(IndexBinary* idx, IOReader* f) {
|
|
750
|
+
READ1(idx->d);
|
|
751
|
+
READ1(idx->code_size);
|
|
752
|
+
READ1(idx->ntotal);
|
|
753
|
+
READ1(idx->is_trained);
|
|
754
|
+
READ1(idx->metric_type);
|
|
684
755
|
idx->verbose = false;
|
|
685
756
|
}
|
|
686
757
|
|
|
687
|
-
static void read_binary_ivf_header
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
{
|
|
691
|
-
read_index_binary_header
|
|
692
|
-
READ1
|
|
693
|
-
READ1
|
|
694
|
-
ivf->quantizer = read_index_binary
|
|
758
|
+
static void read_binary_ivf_header(
|
|
759
|
+
IndexBinaryIVF* ivf,
|
|
760
|
+
IOReader* f,
|
|
761
|
+
std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
|
|
762
|
+
read_index_binary_header(ivf, f);
|
|
763
|
+
READ1(ivf->nlist);
|
|
764
|
+
READ1(ivf->nprobe);
|
|
765
|
+
ivf->quantizer = read_index_binary(f);
|
|
695
766
|
ivf->own_fields = true;
|
|
696
767
|
if (ids) { // used in legacy "Iv" formats
|
|
697
|
-
ids->resize
|
|
768
|
+
ids->resize(ivf->nlist);
|
|
698
769
|
for (size_t i = 0; i < ivf->nlist; i++)
|
|
699
|
-
READVECTOR
|
|
770
|
+
READVECTOR((*ids)[i]);
|
|
700
771
|
}
|
|
701
|
-
read_direct_map
|
|
772
|
+
read_direct_map(&ivf->direct_map, f);
|
|
702
773
|
}
|
|
703
774
|
|
|
704
|
-
static void read_binary_hash_invlists
|
|
705
|
-
IndexBinaryHash::InvertedListMap
|
|
706
|
-
int b,
|
|
707
|
-
{
|
|
775
|
+
static void read_binary_hash_invlists(
|
|
776
|
+
IndexBinaryHash::InvertedListMap& invlists,
|
|
777
|
+
int b,
|
|
778
|
+
IOReader* f) {
|
|
708
779
|
size_t sz;
|
|
709
|
-
READ1
|
|
780
|
+
READ1(sz);
|
|
710
781
|
int il_nbit = 0;
|
|
711
|
-
READ1
|
|
782
|
+
READ1(il_nbit);
|
|
712
783
|
// buffer for bitstrings
|
|
713
784
|
std::vector<uint8_t> buf((b + il_nbit) * sz);
|
|
714
|
-
READVECTOR
|
|
715
|
-
BitstringReader rd
|
|
716
|
-
invlists.reserve
|
|
785
|
+
READVECTOR(buf);
|
|
786
|
+
BitstringReader rd(buf.data(), buf.size());
|
|
787
|
+
invlists.reserve(sz);
|
|
717
788
|
for (size_t i = 0; i < sz; i++) {
|
|
718
789
|
uint64_t hash = rd.read(b);
|
|
719
790
|
uint64_t ilsz = rd.read(il_nbit);
|
|
720
|
-
auto
|
|
721
|
-
READVECTOR
|
|
722
|
-
FAISS_THROW_IF_NOT
|
|
723
|
-
READVECTOR
|
|
791
|
+
auto& il = invlists[hash];
|
|
792
|
+
READVECTOR(il.ids);
|
|
793
|
+
FAISS_THROW_IF_NOT(il.ids.size() == ilsz);
|
|
794
|
+
READVECTOR(il.vecs);
|
|
724
795
|
}
|
|
725
796
|
}
|
|
726
797
|
|
|
727
798
|
static void read_binary_multi_hash_map(
|
|
728
|
-
IndexBinaryMultiHash::Map
|
|
729
|
-
int b,
|
|
730
|
-
|
|
731
|
-
{
|
|
799
|
+
IndexBinaryMultiHash::Map& map,
|
|
800
|
+
int b,
|
|
801
|
+
size_t ntotal,
|
|
802
|
+
IOReader* f) {
|
|
732
803
|
int id_bits;
|
|
733
804
|
size_t sz;
|
|
734
|
-
READ1
|
|
735
|
-
READ1
|
|
805
|
+
READ1(id_bits);
|
|
806
|
+
READ1(sz);
|
|
736
807
|
std::vector<uint8_t> buf;
|
|
737
|
-
READVECTOR
|
|
808
|
+
READVECTOR(buf);
|
|
738
809
|
size_t nbit = (b + id_bits) * sz + ntotal * id_bits;
|
|
739
|
-
FAISS_THROW_IF_NOT
|
|
740
|
-
BitstringReader rd
|
|
741
|
-
map.reserve
|
|
810
|
+
FAISS_THROW_IF_NOT(buf.size() == (nbit + 7) / 8);
|
|
811
|
+
BitstringReader rd(buf.data(), buf.size());
|
|
812
|
+
map.reserve(sz);
|
|
742
813
|
for (size_t i = 0; i < sz; i++) {
|
|
743
814
|
uint64_t hash = rd.read(b);
|
|
744
815
|
uint64_t ilsz = rd.read(id_bits);
|
|
745
|
-
auto
|
|
816
|
+
auto& il = map[hash];
|
|
746
817
|
for (size_t j = 0; j < ilsz; j++) {
|
|
747
|
-
il.push_back
|
|
818
|
+
il.push_back(rd.read(id_bits));
|
|
748
819
|
}
|
|
749
820
|
}
|
|
750
821
|
}
|
|
751
822
|
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
IndexBinary *read_index_binary (IOReader *f, int io_flags) {
|
|
755
|
-
IndexBinary * idx = nullptr;
|
|
823
|
+
IndexBinary* read_index_binary(IOReader* f, int io_flags) {
|
|
824
|
+
IndexBinary* idx = nullptr;
|
|
756
825
|
uint32_t h;
|
|
757
|
-
READ1
|
|
758
|
-
if (h == fourcc
|
|
759
|
-
IndexBinaryFlat
|
|
760
|
-
read_index_binary_header
|
|
761
|
-
READVECTOR
|
|
762
|
-
FAISS_THROW_IF_NOT
|
|
826
|
+
READ1(h);
|
|
827
|
+
if (h == fourcc("IBxF")) {
|
|
828
|
+
IndexBinaryFlat* idxf = new IndexBinaryFlat();
|
|
829
|
+
read_index_binary_header(idxf, f);
|
|
830
|
+
READVECTOR(idxf->xb);
|
|
831
|
+
FAISS_THROW_IF_NOT(idxf->xb.size() == idxf->ntotal * idxf->code_size);
|
|
763
832
|
// leak!
|
|
764
833
|
idx = idxf;
|
|
765
|
-
} else if (h == fourcc
|
|
766
|
-
IndexBinaryIVF
|
|
767
|
-
read_binary_ivf_header
|
|
768
|
-
read_InvertedLists
|
|
834
|
+
} else if (h == fourcc("IBwF")) {
|
|
835
|
+
IndexBinaryIVF* ivf = new IndexBinaryIVF();
|
|
836
|
+
read_binary_ivf_header(ivf, f);
|
|
837
|
+
read_InvertedLists(ivf, f, io_flags);
|
|
769
838
|
idx = ivf;
|
|
770
|
-
} else if (h == fourcc
|
|
771
|
-
IndexBinaryFromFloat
|
|
772
|
-
read_index_binary_header
|
|
839
|
+
} else if (h == fourcc("IBFf")) {
|
|
840
|
+
IndexBinaryFromFloat* idxff = new IndexBinaryFromFloat();
|
|
841
|
+
read_index_binary_header(idxff, f);
|
|
773
842
|
idxff->own_fields = true;
|
|
774
|
-
idxff->index = read_index
|
|
843
|
+
idxff->index = read_index(f, io_flags);
|
|
775
844
|
idx = idxff;
|
|
776
|
-
} else if (h == fourcc
|
|
777
|
-
IndexBinaryHNSW
|
|
778
|
-
read_index_binary_header
|
|
779
|
-
read_HNSW
|
|
780
|
-
idxhnsw->storage = read_index_binary
|
|
845
|
+
} else if (h == fourcc("IBHf")) {
|
|
846
|
+
IndexBinaryHNSW* idxhnsw = new IndexBinaryHNSW();
|
|
847
|
+
read_index_binary_header(idxhnsw, f);
|
|
848
|
+
read_HNSW(&idxhnsw->hnsw, f);
|
|
849
|
+
idxhnsw->storage = read_index_binary(f, io_flags);
|
|
781
850
|
idxhnsw->own_fields = true;
|
|
782
851
|
idx = idxhnsw;
|
|
783
|
-
} else if(h == fourcc
|
|
784
|
-
bool is_map2 = h == fourcc
|
|
785
|
-
IndexBinaryIDMap
|
|
786
|
-
|
|
787
|
-
read_index_binary_header
|
|
788
|
-
idxmap->index = read_index_binary
|
|
852
|
+
} else if (h == fourcc("IBMp") || h == fourcc("IBM2")) {
|
|
853
|
+
bool is_map2 = h == fourcc("IBM2");
|
|
854
|
+
IndexBinaryIDMap* idxmap =
|
|
855
|
+
is_map2 ? new IndexBinaryIDMap2() : new IndexBinaryIDMap();
|
|
856
|
+
read_index_binary_header(idxmap, f);
|
|
857
|
+
idxmap->index = read_index_binary(f, io_flags);
|
|
789
858
|
idxmap->own_fields = true;
|
|
790
|
-
READVECTOR
|
|
859
|
+
READVECTOR(idxmap->id_map);
|
|
791
860
|
if (is_map2) {
|
|
792
|
-
static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map
|
|
861
|
+
static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map();
|
|
793
862
|
}
|
|
794
863
|
idx = idxmap;
|
|
795
|
-
} else if(h == fourcc("IBHh")) {
|
|
796
|
-
IndexBinaryHash
|
|
797
|
-
read_index_binary_header
|
|
798
|
-
READ1
|
|
799
|
-
READ1
|
|
864
|
+
} else if (h == fourcc("IBHh")) {
|
|
865
|
+
IndexBinaryHash* idxh = new IndexBinaryHash();
|
|
866
|
+
read_index_binary_header(idxh, f);
|
|
867
|
+
READ1(idxh->b);
|
|
868
|
+
READ1(idxh->nflip);
|
|
800
869
|
read_binary_hash_invlists(idxh->invlists, idxh->b, f);
|
|
801
870
|
idx = idxh;
|
|
802
|
-
} else if(h == fourcc("IBHm")) {
|
|
803
|
-
IndexBinaryMultiHash* idxmh = new IndexBinaryMultiHash
|
|
804
|
-
read_index_binary_header
|
|
805
|
-
idxmh->storage = dynamic_cast<IndexBinaryFlat*>
|
|
806
|
-
FAISS_THROW_IF_NOT(
|
|
871
|
+
} else if (h == fourcc("IBHm")) {
|
|
872
|
+
IndexBinaryMultiHash* idxmh = new IndexBinaryMultiHash();
|
|
873
|
+
read_index_binary_header(idxmh, f);
|
|
874
|
+
idxmh->storage = dynamic_cast<IndexBinaryFlat*>(read_index_binary(f));
|
|
875
|
+
FAISS_THROW_IF_NOT(
|
|
876
|
+
idxmh->storage && idxmh->storage->ntotal == idxmh->ntotal);
|
|
807
877
|
idxmh->own_fields = true;
|
|
808
|
-
READ1
|
|
809
|
-
READ1
|
|
810
|
-
READ1
|
|
811
|
-
idxmh->maps.resize
|
|
878
|
+
READ1(idxmh->b);
|
|
879
|
+
READ1(idxmh->nhash);
|
|
880
|
+
READ1(idxmh->nflip);
|
|
881
|
+
idxmh->maps.resize(idxmh->nhash);
|
|
812
882
|
for (int i = 0; i < idxmh->nhash; i++) {
|
|
813
883
|
read_binary_multi_hash_map(
|
|
814
884
|
idxmh->maps[i], idxmh->b, idxmh->ntotal, f);
|
|
@@ -816,25 +886,23 @@ IndexBinary *read_index_binary (IOReader *f, int io_flags) {
|
|
|
816
886
|
idx = idxmh;
|
|
817
887
|
} else {
|
|
818
888
|
FAISS_THROW_FMT(
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
889
|
+
"Index type %08x (\"%s\") not recognized",
|
|
890
|
+
h,
|
|
891
|
+
fourcc_inv_printable(h).c_str());
|
|
822
892
|
idx = nullptr;
|
|
823
893
|
}
|
|
824
894
|
return idx;
|
|
825
895
|
}
|
|
826
896
|
|
|
827
|
-
IndexBinary
|
|
897
|
+
IndexBinary* read_index_binary(FILE* f, int io_flags) {
|
|
828
898
|
FileIOReader reader(f);
|
|
829
899
|
return read_index_binary(&reader, io_flags);
|
|
830
900
|
}
|
|
831
901
|
|
|
832
|
-
IndexBinary
|
|
902
|
+
IndexBinary* read_index_binary(const char* fname, int io_flags) {
|
|
833
903
|
FileIOReader reader(fname);
|
|
834
|
-
IndexBinary
|
|
904
|
+
IndexBinary* idx = read_index_binary(&reader, io_flags);
|
|
835
905
|
return idx;
|
|
836
906
|
}
|
|
837
907
|
|
|
838
|
-
|
|
839
|
-
|
|
840
908
|
} // namespace faiss
|