faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -9,11 +9,13 @@
|
|
|
9
9
|
|
|
10
10
|
#include <faiss/index_io.h>
|
|
11
11
|
|
|
12
|
+
#include <faiss/impl/io_macros.h>
|
|
13
|
+
|
|
12
14
|
#include <cstdio>
|
|
13
15
|
#include <cstdlib>
|
|
14
16
|
|
|
15
|
-
#include <sys/types.h>
|
|
16
17
|
#include <sys/stat.h>
|
|
18
|
+
#include <sys/types.h>
|
|
17
19
|
|
|
18
20
|
#include <faiss/impl/FaissAssert.h>
|
|
19
21
|
#include <faiss/impl/io.h>
|
|
@@ -22,343 +24,434 @@
|
|
|
22
24
|
|
|
23
25
|
#include <faiss/invlists/InvertedListsIOHook.h>
|
|
24
26
|
|
|
27
|
+
#include <faiss/Index2Layer.h>
|
|
28
|
+
#include <faiss/IndexAdditiveQuantizer.h>
|
|
25
29
|
#include <faiss/IndexFlat.h>
|
|
26
|
-
#include <faiss/
|
|
27
|
-
#include <faiss/IndexPreTransform.h>
|
|
28
|
-
#include <faiss/IndexLSH.h>
|
|
29
|
-
#include <faiss/IndexPQ.h>
|
|
30
|
+
#include <faiss/IndexHNSW.h>
|
|
30
31
|
#include <faiss/IndexIVF.h>
|
|
32
|
+
#include <faiss/IndexIVFAdditiveQuantizer.h>
|
|
33
|
+
#include <faiss/IndexIVFFlat.h>
|
|
31
34
|
#include <faiss/IndexIVFPQ.h>
|
|
35
|
+
#include <faiss/IndexIVFPQFastScan.h>
|
|
32
36
|
#include <faiss/IndexIVFPQR.h>
|
|
33
|
-
#include <faiss/Index2Layer.h>
|
|
34
|
-
#include <faiss/IndexIVFFlat.h>
|
|
35
37
|
#include <faiss/IndexIVFSpectralHash.h>
|
|
36
|
-
#include <faiss/
|
|
37
|
-
#include <faiss/IndexScalarQuantizer.h>
|
|
38
|
-
#include <faiss/IndexHNSW.h>
|
|
38
|
+
#include <faiss/IndexLSH.h>
|
|
39
39
|
#include <faiss/IndexLattice.h>
|
|
40
|
+
#include <faiss/IndexNSG.h>
|
|
41
|
+
#include <faiss/IndexPQ.h>
|
|
40
42
|
#include <faiss/IndexPQFastScan.h>
|
|
41
|
-
#include <faiss/
|
|
43
|
+
#include <faiss/IndexPreTransform.h>
|
|
42
44
|
#include <faiss/IndexRefine.h>
|
|
45
|
+
#include <faiss/IndexScalarQuantizer.h>
|
|
46
|
+
#include <faiss/MetaIndexes.h>
|
|
47
|
+
#include <faiss/VectorTransform.h>
|
|
43
48
|
|
|
44
49
|
#include <faiss/IndexBinaryFlat.h>
|
|
45
50
|
#include <faiss/IndexBinaryFromFloat.h>
|
|
46
51
|
#include <faiss/IndexBinaryHNSW.h>
|
|
47
|
-
#include <faiss/IndexBinaryIVF.h>
|
|
48
52
|
#include <faiss/IndexBinaryHash.h>
|
|
53
|
+
#include <faiss/IndexBinaryIVF.h>
|
|
49
54
|
|
|
50
55
|
namespace faiss {
|
|
51
56
|
|
|
52
|
-
|
|
53
57
|
/*************************************************************
|
|
54
58
|
* Read
|
|
55
59
|
**************************************************************/
|
|
56
60
|
|
|
57
|
-
static void read_index_header
|
|
58
|
-
READ1
|
|
59
|
-
READ1
|
|
61
|
+
static void read_index_header(Index* idx, IOReader* f) {
|
|
62
|
+
READ1(idx->d);
|
|
63
|
+
READ1(idx->ntotal);
|
|
60
64
|
Index::idx_t dummy;
|
|
61
|
-
READ1
|
|
62
|
-
READ1
|
|
63
|
-
READ1
|
|
64
|
-
READ1
|
|
65
|
+
READ1(dummy);
|
|
66
|
+
READ1(dummy);
|
|
67
|
+
READ1(idx->is_trained);
|
|
68
|
+
READ1(idx->metric_type);
|
|
65
69
|
if (idx->metric_type > 1) {
|
|
66
|
-
READ1
|
|
70
|
+
READ1(idx->metric_arg);
|
|
67
71
|
}
|
|
68
72
|
idx->verbose = false;
|
|
69
73
|
}
|
|
70
74
|
|
|
71
|
-
VectorTransform* read_VectorTransform
|
|
75
|
+
VectorTransform* read_VectorTransform(IOReader* f) {
|
|
72
76
|
uint32_t h;
|
|
73
|
-
READ1
|
|
74
|
-
VectorTransform
|
|
75
|
-
|
|
76
|
-
if (h == fourcc
|
|
77
|
-
h == fourcc ("
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
PCAMatrix
|
|
85
|
-
READ1
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
77
|
+
READ1(h);
|
|
78
|
+
VectorTransform* vt = nullptr;
|
|
79
|
+
|
|
80
|
+
if (h == fourcc("rrot") || h == fourcc("PCAm") || h == fourcc("LTra") ||
|
|
81
|
+
h == fourcc("PcAm") || h == fourcc("Viqm") || h == fourcc("Pcam")) {
|
|
82
|
+
LinearTransform* lt = nullptr;
|
|
83
|
+
if (h == fourcc("rrot")) {
|
|
84
|
+
lt = new RandomRotationMatrix();
|
|
85
|
+
} else if (
|
|
86
|
+
h == fourcc("PCAm") || h == fourcc("PcAm") ||
|
|
87
|
+
h == fourcc("Pcam")) {
|
|
88
|
+
PCAMatrix* pca = new PCAMatrix();
|
|
89
|
+
READ1(pca->eigen_power);
|
|
90
|
+
if (h == fourcc("Pcam")) {
|
|
91
|
+
READ1(pca->epsilon);
|
|
92
|
+
}
|
|
93
|
+
READ1(pca->random_rotation);
|
|
94
|
+
if (h != fourcc("PCAm")) {
|
|
95
|
+
READ1(pca->balanced_bins);
|
|
96
|
+
}
|
|
97
|
+
READVECTOR(pca->mean);
|
|
98
|
+
READVECTOR(pca->eigenvalues);
|
|
99
|
+
READVECTOR(pca->PCAMat);
|
|
92
100
|
lt = pca;
|
|
93
|
-
} else if (h == fourcc
|
|
94
|
-
ITQMatrix
|
|
95
|
-
READ1
|
|
96
|
-
READ1
|
|
101
|
+
} else if (h == fourcc("Viqm")) {
|
|
102
|
+
ITQMatrix* itqm = new ITQMatrix();
|
|
103
|
+
READ1(itqm->max_iter);
|
|
104
|
+
READ1(itqm->seed);
|
|
97
105
|
lt = itqm;
|
|
98
|
-
} else if (h == fourcc
|
|
99
|
-
lt = new LinearTransform
|
|
106
|
+
} else if (h == fourcc("LTra")) {
|
|
107
|
+
lt = new LinearTransform();
|
|
100
108
|
}
|
|
101
|
-
READ1
|
|
102
|
-
READVECTOR
|
|
103
|
-
READVECTOR
|
|
104
|
-
FAISS_THROW_IF_NOT
|
|
105
|
-
FAISS_THROW_IF_NOT
|
|
109
|
+
READ1(lt->have_bias);
|
|
110
|
+
READVECTOR(lt->A);
|
|
111
|
+
READVECTOR(lt->b);
|
|
112
|
+
FAISS_THROW_IF_NOT(lt->A.size() >= lt->d_in * lt->d_out);
|
|
113
|
+
FAISS_THROW_IF_NOT(!lt->have_bias || lt->b.size() >= lt->d_out);
|
|
106
114
|
lt->set_is_orthonormal();
|
|
107
115
|
vt = lt;
|
|
108
|
-
} else if (h == fourcc
|
|
109
|
-
RemapDimensionsTransform
|
|
110
|
-
READVECTOR
|
|
116
|
+
} else if (h == fourcc("RmDT")) {
|
|
117
|
+
RemapDimensionsTransform* rdt = new RemapDimensionsTransform();
|
|
118
|
+
READVECTOR(rdt->map);
|
|
111
119
|
vt = rdt;
|
|
112
|
-
} else if (h == fourcc
|
|
113
|
-
NormalizationTransform
|
|
114
|
-
READ1
|
|
120
|
+
} else if (h == fourcc("VNrm")) {
|
|
121
|
+
NormalizationTransform* nt = new NormalizationTransform();
|
|
122
|
+
READ1(nt->norm);
|
|
115
123
|
vt = nt;
|
|
116
|
-
} else if (h == fourcc
|
|
117
|
-
CenteringTransform
|
|
118
|
-
READVECTOR
|
|
124
|
+
} else if (h == fourcc("VCnt")) {
|
|
125
|
+
CenteringTransform* ct = new CenteringTransform();
|
|
126
|
+
READVECTOR(ct->mean);
|
|
119
127
|
vt = ct;
|
|
120
|
-
} else if (h == fourcc
|
|
121
|
-
ITQTransform
|
|
128
|
+
} else if (h == fourcc("Viqt")) {
|
|
129
|
+
ITQTransform* itqt = new ITQTransform();
|
|
122
130
|
|
|
123
|
-
READVECTOR
|
|
124
|
-
READ1
|
|
131
|
+
READVECTOR(itqt->mean);
|
|
132
|
+
READ1(itqt->do_pca);
|
|
125
133
|
{
|
|
126
|
-
ITQMatrix
|
|
127
|
-
(read_VectorTransform (f));
|
|
134
|
+
ITQMatrix* itqm = dynamic_cast<ITQMatrix*>(read_VectorTransform(f));
|
|
128
135
|
FAISS_THROW_IF_NOT(itqm);
|
|
129
136
|
itqt->itq = *itqm;
|
|
130
137
|
delete itqm;
|
|
131
138
|
}
|
|
132
139
|
{
|
|
133
|
-
LinearTransform
|
|
134
|
-
|
|
135
|
-
FAISS_THROW_IF_NOT
|
|
140
|
+
LinearTransform* pi =
|
|
141
|
+
dynamic_cast<LinearTransform*>(read_VectorTransform(f));
|
|
142
|
+
FAISS_THROW_IF_NOT(pi);
|
|
136
143
|
itqt->pca_then_itq = *pi;
|
|
137
144
|
delete pi;
|
|
138
145
|
}
|
|
139
146
|
vt = itqt;
|
|
140
147
|
} else {
|
|
141
148
|
FAISS_THROW_FMT(
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
149
|
+
"fourcc %ud (\"%s\") not recognized in %s",
|
|
150
|
+
h,
|
|
151
|
+
fourcc_inv_printable(h).c_str(),
|
|
152
|
+
f->name.c_str());
|
|
145
153
|
}
|
|
146
|
-
READ1
|
|
147
|
-
READ1
|
|
148
|
-
READ1
|
|
154
|
+
READ1(vt->d_in);
|
|
155
|
+
READ1(vt->d_out);
|
|
156
|
+
READ1(vt->is_trained);
|
|
149
157
|
return vt;
|
|
150
158
|
}
|
|
151
159
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
{
|
|
160
|
+
static void read_ArrayInvertedLists_sizes(
|
|
161
|
+
IOReader* f,
|
|
162
|
+
std::vector<size_t>& sizes) {
|
|
156
163
|
uint32_t list_type;
|
|
157
164
|
READ1(list_type);
|
|
158
165
|
if (list_type == fourcc("full")) {
|
|
159
166
|
size_t os = sizes.size();
|
|
160
|
-
READVECTOR
|
|
161
|
-
FAISS_THROW_IF_NOT
|
|
167
|
+
READVECTOR(sizes);
|
|
168
|
+
FAISS_THROW_IF_NOT(os == sizes.size());
|
|
162
169
|
} else if (list_type == fourcc("sprs")) {
|
|
163
170
|
std::vector<size_t> idsizes;
|
|
164
|
-
READVECTOR
|
|
171
|
+
READVECTOR(idsizes);
|
|
165
172
|
for (size_t j = 0; j < idsizes.size(); j += 2) {
|
|
166
|
-
FAISS_THROW_IF_NOT
|
|
173
|
+
FAISS_THROW_IF_NOT(idsizes[j] < sizes.size());
|
|
167
174
|
sizes[idsizes[j]] = idsizes[j + 1];
|
|
168
175
|
}
|
|
169
176
|
} else {
|
|
170
177
|
FAISS_THROW_FMT(
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
178
|
+
"list_type %ud (\"%s\") not recognized",
|
|
179
|
+
list_type,
|
|
180
|
+
fourcc_inv_printable(list_type).c_str());
|
|
174
181
|
}
|
|
175
182
|
}
|
|
176
183
|
|
|
177
|
-
InvertedLists
|
|
184
|
+
InvertedLists* read_InvertedLists(IOReader* f, int io_flags) {
|
|
178
185
|
uint32_t h;
|
|
179
|
-
READ1
|
|
180
|
-
if (h == fourcc
|
|
181
|
-
fprintf(stderr,
|
|
186
|
+
READ1(h);
|
|
187
|
+
if (h == fourcc("il00")) {
|
|
188
|
+
fprintf(stderr,
|
|
189
|
+
"read_InvertedLists:"
|
|
182
190
|
" WARN! inverted lists not stored with IVF object\n");
|
|
183
191
|
return nullptr;
|
|
184
|
-
} else if (h == fourcc
|
|
185
|
-
auto ails = new ArrayInvertedLists
|
|
186
|
-
READ1
|
|
187
|
-
READ1
|
|
188
|
-
ails->ids.resize
|
|
189
|
-
ails->codes.resize
|
|
190
|
-
std::vector<size_t> sizes
|
|
191
|
-
read_ArrayInvertedLists_sizes
|
|
192
|
+
} else if (h == fourcc("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
|
193
|
+
auto ails = new ArrayInvertedLists(0, 0);
|
|
194
|
+
READ1(ails->nlist);
|
|
195
|
+
READ1(ails->code_size);
|
|
196
|
+
ails->ids.resize(ails->nlist);
|
|
197
|
+
ails->codes.resize(ails->nlist);
|
|
198
|
+
std::vector<size_t> sizes(ails->nlist);
|
|
199
|
+
read_ArrayInvertedLists_sizes(f, sizes);
|
|
192
200
|
for (size_t i = 0; i < ails->nlist; i++) {
|
|
193
|
-
ails->ids[i].resize
|
|
194
|
-
ails->codes[i].resize
|
|
201
|
+
ails->ids[i].resize(sizes[i]);
|
|
202
|
+
ails->codes[i].resize(sizes[i] * ails->code_size);
|
|
195
203
|
}
|
|
196
204
|
for (size_t i = 0; i < ails->nlist; i++) {
|
|
197
205
|
size_t n = ails->ids[i].size();
|
|
198
206
|
if (n > 0) {
|
|
199
|
-
READANDCHECK
|
|
200
|
-
READANDCHECK
|
|
207
|
+
READANDCHECK(ails->codes[i].data(), n * ails->code_size);
|
|
208
|
+
READANDCHECK(ails->ids[i].data(), n);
|
|
201
209
|
}
|
|
202
210
|
}
|
|
203
211
|
return ails;
|
|
204
212
|
|
|
205
|
-
} else if (h == fourcc
|
|
206
|
-
// code is always ilxx where xx is specific to the type of invlists we
|
|
207
|
-
// so we get the 16 high bits from the io_flag and the 16 low bits
|
|
213
|
+
} else if (h == fourcc("ilar") && (io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
|
214
|
+
// code is always ilxx where xx is specific to the type of invlists we
|
|
215
|
+
// want so we get the 16 high bits from the io_flag and the 16 low bits
|
|
216
|
+
// as "il"
|
|
208
217
|
int h2 = (io_flags & 0xffff0000) | (fourcc("il__") & 0x0000ffff);
|
|
209
218
|
size_t nlist, code_size;
|
|
210
|
-
READ1
|
|
211
|
-
READ1
|
|
212
|
-
std::vector<size_t> sizes
|
|
213
|
-
read_ArrayInvertedLists_sizes
|
|
219
|
+
READ1(nlist);
|
|
220
|
+
READ1(code_size);
|
|
221
|
+
std::vector<size_t> sizes(nlist);
|
|
222
|
+
read_ArrayInvertedLists_sizes(f, sizes);
|
|
214
223
|
return InvertedListsIOHook::lookup(h2)->read_ArrayInvertedLists(
|
|
215
224
|
f, io_flags, nlist, code_size, sizes);
|
|
216
225
|
} else {
|
|
217
226
|
return InvertedListsIOHook::lookup(h)->read(f, io_flags);
|
|
218
227
|
}
|
|
219
|
-
|
|
220
228
|
}
|
|
221
229
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
IndexIVF *ivf, IOReader *f, int io_flags) {
|
|
225
|
-
InvertedLists *ils = read_InvertedLists (f, io_flags);
|
|
230
|
+
static void read_InvertedLists(IndexIVF* ivf, IOReader* f, int io_flags) {
|
|
231
|
+
InvertedLists* ils = read_InvertedLists(f, io_flags);
|
|
226
232
|
if (ils) {
|
|
227
|
-
FAISS_THROW_IF_NOT
|
|
228
|
-
FAISS_THROW_IF_NOT
|
|
229
|
-
|
|
233
|
+
FAISS_THROW_IF_NOT(ils->nlist == ivf->nlist);
|
|
234
|
+
FAISS_THROW_IF_NOT(
|
|
235
|
+
ils->code_size == InvertedLists::INVALID_CODE_SIZE ||
|
|
236
|
+
ils->code_size == ivf->code_size);
|
|
230
237
|
}
|
|
231
238
|
ivf->invlists = ils;
|
|
232
239
|
ivf->own_invlists = true;
|
|
233
240
|
}
|
|
234
241
|
|
|
235
|
-
static void read_ProductQuantizer
|
|
236
|
-
READ1
|
|
237
|
-
READ1
|
|
238
|
-
READ1
|
|
239
|
-
pq->set_derived_values
|
|
240
|
-
READVECTOR
|
|
242
|
+
static void read_ProductQuantizer(ProductQuantizer* pq, IOReader* f) {
|
|
243
|
+
READ1(pq->d);
|
|
244
|
+
READ1(pq->M);
|
|
245
|
+
READ1(pq->nbits);
|
|
246
|
+
pq->set_derived_values();
|
|
247
|
+
READVECTOR(pq->centroids);
|
|
241
248
|
}
|
|
242
249
|
|
|
243
|
-
static void
|
|
244
|
-
READ1
|
|
245
|
-
READ1
|
|
246
|
-
|
|
247
|
-
READ1
|
|
248
|
-
READ1
|
|
249
|
-
|
|
250
|
-
|
|
250
|
+
static void read_ResidualQuantizer_old(ResidualQuantizer* rq, IOReader* f) {
|
|
251
|
+
READ1(rq->d);
|
|
252
|
+
READ1(rq->M);
|
|
253
|
+
READVECTOR(rq->nbits);
|
|
254
|
+
READ1(rq->is_trained);
|
|
255
|
+
READ1(rq->train_type);
|
|
256
|
+
READ1(rq->max_beam_size);
|
|
257
|
+
READVECTOR(rq->codebooks);
|
|
258
|
+
READ1(rq->search_type);
|
|
259
|
+
READ1(rq->norm_min);
|
|
260
|
+
READ1(rq->norm_max);
|
|
261
|
+
rq->set_derived_values();
|
|
251
262
|
}
|
|
252
263
|
|
|
264
|
+
static void read_AdditiveQuantizer(AdditiveQuantizer* aq, IOReader* f) {
|
|
265
|
+
READ1(aq->d);
|
|
266
|
+
READ1(aq->M);
|
|
267
|
+
READVECTOR(aq->nbits);
|
|
268
|
+
READ1(aq->is_trained);
|
|
269
|
+
READVECTOR(aq->codebooks);
|
|
270
|
+
READ1(aq->search_type);
|
|
271
|
+
READ1(aq->norm_min);
|
|
272
|
+
READ1(aq->norm_max);
|
|
273
|
+
if (aq->search_type == AdditiveQuantizer::ST_norm_cqint8 ||
|
|
274
|
+
aq->search_type == AdditiveQuantizer::ST_norm_cqint4) {
|
|
275
|
+
READXBVECTOR(aq->qnorm.codes);
|
|
276
|
+
}
|
|
277
|
+
aq->set_derived_values();
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
static void read_ResidualQuantizer(ResidualQuantizer* rq, IOReader* f) {
|
|
281
|
+
read_AdditiveQuantizer(rq, f);
|
|
282
|
+
READ1(rq->train_type);
|
|
283
|
+
READ1(rq->max_beam_size);
|
|
284
|
+
if (!(rq->train_type & ResidualQuantizer::Skip_codebook_tables)) {
|
|
285
|
+
rq->compute_codebook_tables();
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
static void read_LocalSearchQuantizer(LocalSearchQuantizer* lsq, IOReader* f) {
|
|
290
|
+
read_AdditiveQuantizer(lsq, f);
|
|
291
|
+
READ1(lsq->K);
|
|
292
|
+
READ1(lsq->train_iters);
|
|
293
|
+
READ1(lsq->encode_ils_iters);
|
|
294
|
+
READ1(lsq->train_ils_iters);
|
|
295
|
+
READ1(lsq->icm_iters);
|
|
296
|
+
READ1(lsq->p);
|
|
297
|
+
READ1(lsq->lambd);
|
|
298
|
+
READ1(lsq->chunk_size);
|
|
299
|
+
READ1(lsq->random_seed);
|
|
300
|
+
READ1(lsq->nperts);
|
|
301
|
+
READ1(lsq->update_codebooks_with_double);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
static void read_ScalarQuantizer(ScalarQuantizer* ivsc, IOReader* f) {
|
|
305
|
+
READ1(ivsc->qtype);
|
|
306
|
+
READ1(ivsc->rangestat);
|
|
307
|
+
READ1(ivsc->rangestat_arg);
|
|
308
|
+
READ1(ivsc->d);
|
|
309
|
+
READ1(ivsc->code_size);
|
|
310
|
+
READVECTOR(ivsc->trained);
|
|
311
|
+
ivsc->set_derived_sizes();
|
|
312
|
+
}
|
|
253
313
|
|
|
254
|
-
static void read_HNSW
|
|
255
|
-
READVECTOR
|
|
256
|
-
READVECTOR
|
|
257
|
-
READVECTOR
|
|
258
|
-
READVECTOR
|
|
259
|
-
READVECTOR
|
|
314
|
+
static void read_HNSW(HNSW* hnsw, IOReader* f) {
|
|
315
|
+
READVECTOR(hnsw->assign_probas);
|
|
316
|
+
READVECTOR(hnsw->cum_nneighbor_per_level);
|
|
317
|
+
READVECTOR(hnsw->levels);
|
|
318
|
+
READVECTOR(hnsw->offsets);
|
|
319
|
+
READVECTOR(hnsw->neighbors);
|
|
320
|
+
|
|
321
|
+
READ1(hnsw->entry_point);
|
|
322
|
+
READ1(hnsw->max_level);
|
|
323
|
+
READ1(hnsw->efConstruction);
|
|
324
|
+
READ1(hnsw->efSearch);
|
|
325
|
+
READ1(hnsw->upper_beam);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
static void read_NSG(NSG* nsg, IOReader* f) {
|
|
329
|
+
READ1(nsg->ntotal);
|
|
330
|
+
READ1(nsg->R);
|
|
331
|
+
READ1(nsg->L);
|
|
332
|
+
READ1(nsg->C);
|
|
333
|
+
READ1(nsg->search_L);
|
|
334
|
+
READ1(nsg->enterpoint);
|
|
335
|
+
READ1(nsg->is_built);
|
|
336
|
+
|
|
337
|
+
if (!nsg->is_built) {
|
|
338
|
+
return;
|
|
339
|
+
}
|
|
260
340
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
341
|
+
constexpr int EMPTY_ID = -1;
|
|
342
|
+
int N = nsg->ntotal;
|
|
343
|
+
int R = nsg->R;
|
|
344
|
+
auto& graph = nsg->final_graph;
|
|
345
|
+
graph = std::make_shared<nsg::Graph<int>>(N, R);
|
|
346
|
+
std::fill_n(graph->data, N * R, EMPTY_ID);
|
|
347
|
+
|
|
348
|
+
int size = 0;
|
|
349
|
+
|
|
350
|
+
for (int i = 0; i < N; i++) {
|
|
351
|
+
for (int j = 0; j < R + 1; j++) {
|
|
352
|
+
int id;
|
|
353
|
+
READ1(id);
|
|
354
|
+
if (id != EMPTY_ID) {
|
|
355
|
+
graph->at(i, j) = id;
|
|
356
|
+
size += 1;
|
|
357
|
+
} else {
|
|
358
|
+
break;
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|
|
266
362
|
}
|
|
267
363
|
|
|
268
|
-
ProductQuantizer
|
|
364
|
+
ProductQuantizer* read_ProductQuantizer(const char* fname) {
|
|
269
365
|
FileIOReader reader(fname);
|
|
270
366
|
return read_ProductQuantizer(&reader);
|
|
271
367
|
}
|
|
272
368
|
|
|
273
|
-
ProductQuantizer
|
|
274
|
-
|
|
275
|
-
|
|
369
|
+
ProductQuantizer* read_ProductQuantizer(IOReader* reader) {
|
|
370
|
+
ProductQuantizer* pq = new ProductQuantizer();
|
|
371
|
+
ScopeDeleter1<ProductQuantizer> del(pq);
|
|
276
372
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
373
|
+
read_ProductQuantizer(pq, reader);
|
|
374
|
+
del.release();
|
|
375
|
+
return pq;
|
|
280
376
|
}
|
|
281
377
|
|
|
282
|
-
static void read_direct_map
|
|
378
|
+
static void read_direct_map(DirectMap* dm, IOReader* f) {
|
|
283
379
|
char maintain_direct_map;
|
|
284
|
-
READ1
|
|
380
|
+
READ1(maintain_direct_map);
|
|
285
381
|
dm->type = (DirectMap::Type)maintain_direct_map;
|
|
286
|
-
READVECTOR
|
|
382
|
+
READVECTOR(dm->array);
|
|
287
383
|
if (dm->type == DirectMap::Hashtable) {
|
|
288
384
|
using idx_t = Index::idx_t;
|
|
289
385
|
std::vector<std::pair<idx_t, idx_t>> v;
|
|
290
|
-
READVECTOR
|
|
291
|
-
std::unordered_map<idx_t, idx_t
|
|
292
|
-
map.reserve
|
|
293
|
-
for (auto it: v) {
|
|
294
|
-
map
|
|
386
|
+
READVECTOR(v);
|
|
387
|
+
std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
|
|
388
|
+
map.reserve(v.size());
|
|
389
|
+
for (auto it : v) {
|
|
390
|
+
map[it.first] = it.second;
|
|
295
391
|
}
|
|
296
392
|
}
|
|
297
|
-
|
|
298
393
|
}
|
|
299
394
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
READ1
|
|
307
|
-
|
|
308
|
-
ivf->quantizer = read_index (f);
|
|
395
|
+
static void read_ivf_header(
|
|
396
|
+
IndexIVF* ivf,
|
|
397
|
+
IOReader* f,
|
|
398
|
+
std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
|
|
399
|
+
read_index_header(ivf, f);
|
|
400
|
+
READ1(ivf->nlist);
|
|
401
|
+
READ1(ivf->nprobe);
|
|
402
|
+
ivf->quantizer = read_index(f);
|
|
309
403
|
ivf->own_fields = true;
|
|
310
404
|
if (ids) { // used in legacy "Iv" formats
|
|
311
|
-
ids->resize
|
|
405
|
+
ids->resize(ivf->nlist);
|
|
312
406
|
for (size_t i = 0; i < ivf->nlist; i++)
|
|
313
|
-
READVECTOR
|
|
407
|
+
READVECTOR((*ids)[i]);
|
|
314
408
|
}
|
|
315
|
-
read_direct_map
|
|
409
|
+
read_direct_map(&ivf->direct_map, f);
|
|
316
410
|
}
|
|
317
411
|
|
|
318
412
|
// used for legacy formats
|
|
319
|
-
static ArrayInvertedLists
|
|
320
|
-
|
|
321
|
-
{
|
|
322
|
-
ArrayInvertedLists
|
|
323
|
-
|
|
324
|
-
std::swap
|
|
413
|
+
static ArrayInvertedLists* set_array_invlist(
|
|
414
|
+
IndexIVF* ivf,
|
|
415
|
+
std::vector<std::vector<Index::idx_t>>& ids) {
|
|
416
|
+
ArrayInvertedLists* ail =
|
|
417
|
+
new ArrayInvertedLists(ivf->nlist, ivf->code_size);
|
|
418
|
+
std::swap(ail->ids, ids);
|
|
325
419
|
ivf->invlists = ail;
|
|
326
420
|
ivf->own_invlists = true;
|
|
327
421
|
return ail;
|
|
328
422
|
}
|
|
329
423
|
|
|
330
|
-
static IndexIVFPQ
|
|
331
|
-
|
|
332
|
-
bool legacy = h == fourcc ("IvQR") || h == fourcc ("IvPQ");
|
|
424
|
+
static IndexIVFPQ* read_ivfpq(IOReader* f, uint32_t h, int io_flags) {
|
|
425
|
+
bool legacy = h == fourcc("IvQR") || h == fourcc("IvPQ");
|
|
333
426
|
|
|
334
|
-
IndexIVFPQR
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
IndexIVFPQ
|
|
427
|
+
IndexIVFPQR* ivfpqr = h == fourcc("IvQR") || h == fourcc("IwQR")
|
|
428
|
+
? new IndexIVFPQR()
|
|
429
|
+
: nullptr;
|
|
430
|
+
IndexIVFPQ* ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ();
|
|
338
431
|
|
|
339
|
-
std::vector<std::vector<Index::idx_t
|
|
340
|
-
read_ivf_header
|
|
341
|
-
READ1
|
|
342
|
-
READ1
|
|
343
|
-
read_ProductQuantizer
|
|
432
|
+
std::vector<std::vector<Index::idx_t>> ids;
|
|
433
|
+
read_ivf_header(ivpq, f, legacy ? &ids : nullptr);
|
|
434
|
+
READ1(ivpq->by_residual);
|
|
435
|
+
READ1(ivpq->code_size);
|
|
436
|
+
read_ProductQuantizer(&ivpq->pq, f);
|
|
344
437
|
|
|
345
438
|
if (legacy) {
|
|
346
|
-
ArrayInvertedLists
|
|
439
|
+
ArrayInvertedLists* ail = set_array_invlist(ivpq, ids);
|
|
347
440
|
for (size_t i = 0; i < ail->nlist; i++)
|
|
348
|
-
READVECTOR
|
|
441
|
+
READVECTOR(ail->codes[i]);
|
|
349
442
|
} else {
|
|
350
|
-
read_InvertedLists
|
|
443
|
+
read_InvertedLists(ivpq, f, io_flags);
|
|
351
444
|
}
|
|
352
445
|
|
|
353
446
|
if (ivpq->is_trained) {
|
|
354
447
|
// precomputed table not stored. It is cheaper to recompute it
|
|
355
448
|
ivpq->use_precomputed_table = 0;
|
|
356
449
|
if (ivpq->by_residual)
|
|
357
|
-
ivpq->precompute_table
|
|
450
|
+
ivpq->precompute_table();
|
|
358
451
|
if (ivfpqr) {
|
|
359
|
-
read_ProductQuantizer
|
|
360
|
-
READVECTOR
|
|
361
|
-
READ1
|
|
452
|
+
read_ProductQuantizer(&ivfpqr->refine_pq, f);
|
|
453
|
+
READVECTOR(ivfpqr->refine_codes);
|
|
454
|
+
READ1(ivfpqr->k_factor);
|
|
362
455
|
}
|
|
363
456
|
}
|
|
364
457
|
return ivpq;
|
|
@@ -366,200 +459,248 @@ static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags)
|
|
|
366
459
|
|
|
367
460
|
int read_old_fmt_hack = 0;
|
|
368
461
|
|
|
369
|
-
Index
|
|
370
|
-
Index
|
|
462
|
+
Index* read_index(IOReader* f, int io_flags) {
|
|
463
|
+
Index* idx = nullptr;
|
|
371
464
|
uint32_t h;
|
|
372
|
-
READ1
|
|
373
|
-
if (h == fourcc
|
|
374
|
-
IndexFlat
|
|
375
|
-
if (h == fourcc
|
|
376
|
-
idxf = new IndexFlatIP
|
|
465
|
+
READ1(h);
|
|
466
|
+
if (h == fourcc("IxFI") || h == fourcc("IxF2") || h == fourcc("IxFl")) {
|
|
467
|
+
IndexFlat* idxf;
|
|
468
|
+
if (h == fourcc("IxFI")) {
|
|
469
|
+
idxf = new IndexFlatIP();
|
|
377
470
|
} else if (h == fourcc("IxF2")) {
|
|
378
|
-
idxf = new IndexFlatL2
|
|
471
|
+
idxf = new IndexFlatL2();
|
|
379
472
|
} else {
|
|
380
|
-
idxf = new IndexFlat
|
|
473
|
+
idxf = new IndexFlat();
|
|
381
474
|
}
|
|
382
|
-
read_index_header
|
|
383
|
-
|
|
384
|
-
|
|
475
|
+
read_index_header(idxf, f);
|
|
476
|
+
idxf->code_size = idxf->d * sizeof(float);
|
|
477
|
+
READXBVECTOR(idxf->codes);
|
|
478
|
+
FAISS_THROW_IF_NOT(
|
|
479
|
+
idxf->codes.size() == idxf->ntotal * idxf->code_size);
|
|
385
480
|
// leak!
|
|
386
481
|
idx = idxf;
|
|
387
482
|
} else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
|
|
388
|
-
IndexLSH
|
|
389
|
-
read_index_header
|
|
390
|
-
READ1
|
|
391
|
-
READ1
|
|
392
|
-
READ1
|
|
393
|
-
READVECTOR
|
|
394
|
-
|
|
483
|
+
IndexLSH* idxl = new IndexLSH();
|
|
484
|
+
read_index_header(idxl, f);
|
|
485
|
+
READ1(idxl->nbits);
|
|
486
|
+
READ1(idxl->rotate_data);
|
|
487
|
+
READ1(idxl->train_thresholds);
|
|
488
|
+
READVECTOR(idxl->thresholds);
|
|
489
|
+
int code_size_i;
|
|
490
|
+
READ1(code_size_i);
|
|
491
|
+
idxl->code_size = code_size_i;
|
|
395
492
|
if (h == fourcc("IxHE")) {
|
|
396
|
-
FAISS_THROW_IF_NOT_FMT
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
493
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
494
|
+
idxl->nbits % 64 == 0,
|
|
495
|
+
"can only read old format IndexLSH with "
|
|
496
|
+
"nbits multiple of 64 (got %d)",
|
|
497
|
+
(int)idxl->nbits);
|
|
400
498
|
// leak
|
|
401
|
-
idxl->
|
|
499
|
+
idxl->code_size *= 8;
|
|
402
500
|
}
|
|
403
501
|
{
|
|
404
|
-
RandomRotationMatrix
|
|
405
|
-
|
|
502
|
+
RandomRotationMatrix* rrot = dynamic_cast<RandomRotationMatrix*>(
|
|
503
|
+
read_VectorTransform(f));
|
|
406
504
|
FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation");
|
|
407
505
|
idxl->rrot = *rrot;
|
|
408
506
|
delete rrot;
|
|
409
507
|
}
|
|
410
|
-
READVECTOR
|
|
411
|
-
FAISS_THROW_IF_NOT
|
|
412
|
-
|
|
413
|
-
FAISS_THROW_IF_NOT
|
|
414
|
-
|
|
508
|
+
READVECTOR(idxl->codes);
|
|
509
|
+
FAISS_THROW_IF_NOT(
|
|
510
|
+
idxl->rrot.d_in == idxl->d && idxl->rrot.d_out == idxl->nbits);
|
|
511
|
+
FAISS_THROW_IF_NOT(
|
|
512
|
+
idxl->codes.size() == idxl->ntotal * idxl->code_size);
|
|
415
513
|
idx = idxl;
|
|
416
|
-
} else if (
|
|
417
|
-
|
|
514
|
+
} else if (
|
|
515
|
+
h == fourcc("IxPQ") || h == fourcc("IxPo") || h == fourcc("IxPq")) {
|
|
418
516
|
// IxPQ and IxPo were merged into the same IndexPQ object
|
|
419
|
-
IndexPQ
|
|
420
|
-
read_index_header
|
|
421
|
-
read_ProductQuantizer
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
READ1
|
|
426
|
-
READ1
|
|
517
|
+
IndexPQ* idxp = new IndexPQ();
|
|
518
|
+
read_index_header(idxp, f);
|
|
519
|
+
read_ProductQuantizer(&idxp->pq, f);
|
|
520
|
+
idxp->code_size = idxp->pq.code_size;
|
|
521
|
+
READVECTOR(idxp->codes);
|
|
522
|
+
if (h == fourcc("IxPo") || h == fourcc("IxPq")) {
|
|
523
|
+
READ1(idxp->search_type);
|
|
524
|
+
READ1(idxp->encode_signs);
|
|
525
|
+
READ1(idxp->polysemous_ht);
|
|
427
526
|
}
|
|
428
527
|
// Old versoins of PQ all had metric_type set to INNER_PRODUCT
|
|
429
528
|
// when they were in fact using L2. Therefore, we force metric type
|
|
430
529
|
// to L2 when the old format is detected
|
|
431
|
-
if (h == fourcc
|
|
530
|
+
if (h == fourcc("IxPQ") || h == fourcc("IxPo")) {
|
|
432
531
|
idxp->metric_type = METRIC_L2;
|
|
433
532
|
}
|
|
434
533
|
idx = idxp;
|
|
435
|
-
} else if (h == fourcc
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
534
|
+
} else if (h == fourcc("IxRQ") || h == fourcc("IxRq")) {
|
|
535
|
+
IndexResidualQuantizer* idxr = new IndexResidualQuantizer();
|
|
536
|
+
read_index_header(idxr, f);
|
|
537
|
+
if (h == fourcc("IxRQ")) {
|
|
538
|
+
read_ResidualQuantizer_old(&idxr->rq, f);
|
|
539
|
+
} else {
|
|
540
|
+
read_ResidualQuantizer(&idxr->rq, f);
|
|
541
|
+
}
|
|
542
|
+
READ1(idxr->code_size);
|
|
543
|
+
READVECTOR(idxr->codes);
|
|
544
|
+
idx = idxr;
|
|
545
|
+
} else if (h == fourcc("IxLS")) {
|
|
546
|
+
auto idxr = new IndexLocalSearchQuantizer();
|
|
547
|
+
read_index_header(idxr, f);
|
|
548
|
+
read_LocalSearchQuantizer(&idxr->lsq, f);
|
|
549
|
+
READ1(idxr->code_size);
|
|
550
|
+
READVECTOR(idxr->codes);
|
|
551
|
+
idx = idxr;
|
|
552
|
+
} else if (h == fourcc("ImRQ")) {
|
|
553
|
+
ResidualCoarseQuantizer* idxr = new ResidualCoarseQuantizer();
|
|
554
|
+
read_index_header(idxr, f);
|
|
555
|
+
read_ResidualQuantizer(&idxr->rq, f);
|
|
556
|
+
READ1(idxr->beam_factor);
|
|
557
|
+
idxr->set_beam_factor(idxr->beam_factor);
|
|
558
|
+
idx = idxr;
|
|
559
|
+
} else if (h == fourcc("IvFl") || h == fourcc("IvFL")) { // legacy
|
|
560
|
+
IndexIVFFlat* ivfl = new IndexIVFFlat();
|
|
561
|
+
std::vector<std::vector<Index::idx_t>> ids;
|
|
562
|
+
read_ivf_header(ivfl, f, &ids);
|
|
439
563
|
ivfl->code_size = ivfl->d * sizeof(float);
|
|
440
|
-
ArrayInvertedLists
|
|
564
|
+
ArrayInvertedLists* ail = set_array_invlist(ivfl, ids);
|
|
441
565
|
|
|
442
|
-
if (h == fourcc
|
|
566
|
+
if (h == fourcc("IvFL")) {
|
|
443
567
|
for (size_t i = 0; i < ivfl->nlist; i++) {
|
|
444
|
-
READVECTOR
|
|
568
|
+
READVECTOR(ail->codes[i]);
|
|
445
569
|
}
|
|
446
570
|
} else { // old format
|
|
447
571
|
for (size_t i = 0; i < ivfl->nlist; i++) {
|
|
448
572
|
std::vector<float> vec;
|
|
449
|
-
READVECTOR
|
|
573
|
+
READVECTOR(vec);
|
|
450
574
|
ail->codes[i].resize(vec.size() * sizeof(float));
|
|
451
|
-
memcpy(ail->codes[i].data(), vec.data(),
|
|
452
|
-
ail->codes[i].size());
|
|
575
|
+
memcpy(ail->codes[i].data(), vec.data(), ail->codes[i].size());
|
|
453
576
|
}
|
|
454
577
|
}
|
|
455
578
|
idx = ivfl;
|
|
456
|
-
} else if (h == fourcc
|
|
457
|
-
IndexIVFFlatDedup
|
|
458
|
-
read_ivf_header
|
|
579
|
+
} else if (h == fourcc("IwFd")) {
|
|
580
|
+
IndexIVFFlatDedup* ivfl = new IndexIVFFlatDedup();
|
|
581
|
+
read_ivf_header(ivfl, f);
|
|
459
582
|
ivfl->code_size = ivfl->d * sizeof(float);
|
|
460
583
|
{
|
|
461
584
|
std::vector<Index::idx_t> tab;
|
|
462
|
-
READVECTOR
|
|
585
|
+
READVECTOR(tab);
|
|
463
586
|
for (long i = 0; i < tab.size(); i += 2) {
|
|
464
|
-
std::pair<Index::idx_t, Index::idx_t>
|
|
465
|
-
|
|
466
|
-
ivfl->instances.insert (pair);
|
|
587
|
+
std::pair<Index::idx_t, Index::idx_t> pair(tab[i], tab[i + 1]);
|
|
588
|
+
ivfl->instances.insert(pair);
|
|
467
589
|
}
|
|
468
590
|
}
|
|
469
|
-
read_InvertedLists
|
|
591
|
+
read_InvertedLists(ivfl, f, io_flags);
|
|
470
592
|
idx = ivfl;
|
|
471
|
-
} else if (h == fourcc
|
|
472
|
-
IndexIVFFlat
|
|
473
|
-
read_ivf_header
|
|
593
|
+
} else if (h == fourcc("IwFl")) {
|
|
594
|
+
IndexIVFFlat* ivfl = new IndexIVFFlat();
|
|
595
|
+
read_ivf_header(ivfl, f);
|
|
474
596
|
ivfl->code_size = ivfl->d * sizeof(float);
|
|
475
|
-
read_InvertedLists
|
|
597
|
+
read_InvertedLists(ivfl, f, io_flags);
|
|
476
598
|
idx = ivfl;
|
|
477
|
-
} else if (h == fourcc
|
|
478
|
-
IndexScalarQuantizer
|
|
479
|
-
read_index_header
|
|
480
|
-
read_ScalarQuantizer
|
|
481
|
-
READVECTOR
|
|
599
|
+
} else if (h == fourcc("IxSQ")) {
|
|
600
|
+
IndexScalarQuantizer* idxs = new IndexScalarQuantizer();
|
|
601
|
+
read_index_header(idxs, f);
|
|
602
|
+
read_ScalarQuantizer(&idxs->sq, f);
|
|
603
|
+
READVECTOR(idxs->codes);
|
|
482
604
|
idxs->code_size = idxs->sq.code_size;
|
|
483
605
|
idx = idxs;
|
|
484
|
-
} else if (h == fourcc
|
|
606
|
+
} else if (h == fourcc("IxLa")) {
|
|
485
607
|
int d, nsq, scale_nbit, r2;
|
|
486
|
-
READ1
|
|
487
|
-
READ1
|
|
488
|
-
READ1
|
|
489
|
-
READ1
|
|
490
|
-
IndexLattice
|
|
491
|
-
read_index_header
|
|
492
|
-
READVECTOR
|
|
608
|
+
READ1(d);
|
|
609
|
+
READ1(nsq);
|
|
610
|
+
READ1(scale_nbit);
|
|
611
|
+
READ1(r2);
|
|
612
|
+
IndexLattice* idxl = new IndexLattice(d, nsq, scale_nbit, r2);
|
|
613
|
+
read_index_header(idxl, f);
|
|
614
|
+
READVECTOR(idxl->trained);
|
|
493
615
|
idx = idxl;
|
|
494
|
-
} else if(h == fourcc
|
|
495
|
-
IndexIVFScalarQuantizer
|
|
496
|
-
std::vector<std::vector<Index::idx_t
|
|
497
|
-
read_ivf_header
|
|
498
|
-
read_ScalarQuantizer
|
|
499
|
-
READ1
|
|
500
|
-
ArrayInvertedLists
|
|
501
|
-
for(int i = 0; i < ivsc->nlist; i++)
|
|
502
|
-
READVECTOR
|
|
616
|
+
} else if (h == fourcc("IvSQ")) { // legacy
|
|
617
|
+
IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
|
|
618
|
+
std::vector<std::vector<Index::idx_t>> ids;
|
|
619
|
+
read_ivf_header(ivsc, f, &ids);
|
|
620
|
+
read_ScalarQuantizer(&ivsc->sq, f);
|
|
621
|
+
READ1(ivsc->code_size);
|
|
622
|
+
ArrayInvertedLists* ail = set_array_invlist(ivsc, ids);
|
|
623
|
+
for (int i = 0; i < ivsc->nlist; i++)
|
|
624
|
+
READVECTOR(ail->codes[i]);
|
|
503
625
|
idx = ivsc;
|
|
504
|
-
} else if(h == fourcc
|
|
505
|
-
IndexIVFScalarQuantizer
|
|
506
|
-
read_ivf_header
|
|
507
|
-
read_ScalarQuantizer
|
|
508
|
-
READ1
|
|
509
|
-
if (h == fourcc
|
|
626
|
+
} else if (h == fourcc("IwSQ") || h == fourcc("IwSq")) {
|
|
627
|
+
IndexIVFScalarQuantizer* ivsc = new IndexIVFScalarQuantizer();
|
|
628
|
+
read_ivf_header(ivsc, f);
|
|
629
|
+
read_ScalarQuantizer(&ivsc->sq, f);
|
|
630
|
+
READ1(ivsc->code_size);
|
|
631
|
+
if (h == fourcc("IwSQ")) {
|
|
510
632
|
ivsc->by_residual = true;
|
|
511
633
|
} else {
|
|
512
|
-
READ1
|
|
634
|
+
READ1(ivsc->by_residual);
|
|
513
635
|
}
|
|
514
|
-
read_InvertedLists
|
|
636
|
+
read_InvertedLists(ivsc, f, io_flags);
|
|
515
637
|
idx = ivsc;
|
|
516
|
-
} else if(h == fourcc ("
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
638
|
+
} else if (h == fourcc("IwLS") || h == fourcc("IwRQ")) {
|
|
639
|
+
bool is_LSQ = h == fourcc("IwLS");
|
|
640
|
+
IndexIVFAdditiveQuantizer* iva;
|
|
641
|
+
if (is_LSQ) {
|
|
642
|
+
iva = new IndexIVFLocalSearchQuantizer();
|
|
643
|
+
} else {
|
|
644
|
+
iva = new IndexIVFResidualQuantizer();
|
|
645
|
+
}
|
|
646
|
+
read_ivf_header(iva, f);
|
|
647
|
+
READ1(iva->code_size);
|
|
648
|
+
if (is_LSQ) {
|
|
649
|
+
read_LocalSearchQuantizer((LocalSearchQuantizer*)iva->aq, f);
|
|
650
|
+
} else {
|
|
651
|
+
read_ResidualQuantizer((ResidualQuantizer*)iva->aq, f);
|
|
652
|
+
}
|
|
653
|
+
READ1(iva->by_residual);
|
|
654
|
+
READ1(iva->use_precomputed_table);
|
|
655
|
+
read_InvertedLists(iva, f, io_flags);
|
|
656
|
+
idx = iva;
|
|
657
|
+
} else if (h == fourcc("IwSh")) {
|
|
658
|
+
IndexIVFSpectralHash* ivsp = new IndexIVFSpectralHash();
|
|
659
|
+
read_ivf_header(ivsp, f);
|
|
660
|
+
ivsp->vt = read_VectorTransform(f);
|
|
520
661
|
ivsp->own_fields = true;
|
|
521
|
-
READ1
|
|
662
|
+
READ1(ivsp->nbit);
|
|
522
663
|
// not stored by write_ivf_header
|
|
523
664
|
ivsp->code_size = (ivsp->nbit + 7) / 8;
|
|
524
|
-
READ1
|
|
525
|
-
READ1
|
|
526
|
-
READVECTOR
|
|
527
|
-
read_InvertedLists
|
|
665
|
+
READ1(ivsp->period);
|
|
666
|
+
READ1(ivsp->threshold_type);
|
|
667
|
+
READVECTOR(ivsp->trained);
|
|
668
|
+
read_InvertedLists(ivsp, f, io_flags);
|
|
528
669
|
idx = ivsp;
|
|
529
|
-
} else if
|
|
530
|
-
|
|
670
|
+
} else if (
|
|
671
|
+
h == fourcc("IvPQ") || h == fourcc("IvQR") || h == fourcc("IwPQ") ||
|
|
672
|
+
h == fourcc("IwQR")) {
|
|
673
|
+
idx = read_ivfpq(f, h, io_flags);
|
|
531
674
|
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
} else if(h == fourcc ("IxPT")) {
|
|
535
|
-
IndexPreTransform * ixpt = new IndexPreTransform();
|
|
675
|
+
} else if (h == fourcc("IxPT")) {
|
|
676
|
+
IndexPreTransform* ixpt = new IndexPreTransform();
|
|
536
677
|
ixpt->own_fields = true;
|
|
537
|
-
read_index_header
|
|
678
|
+
read_index_header(ixpt, f);
|
|
538
679
|
int nt;
|
|
539
680
|
if (read_old_fmt_hack == 2) {
|
|
540
681
|
nt = 1;
|
|
541
682
|
} else {
|
|
542
|
-
READ1
|
|
683
|
+
READ1(nt);
|
|
543
684
|
}
|
|
544
685
|
for (int i = 0; i < nt; i++) {
|
|
545
|
-
ixpt->chain.push_back
|
|
686
|
+
ixpt->chain.push_back(read_VectorTransform(f));
|
|
546
687
|
}
|
|
547
|
-
ixpt->index = read_index
|
|
688
|
+
ixpt->index = read_index(f, io_flags);
|
|
548
689
|
idx = ixpt;
|
|
549
|
-
} else if(h == fourcc
|
|
550
|
-
MultiIndexQuantizer
|
|
551
|
-
read_index_header
|
|
552
|
-
read_ProductQuantizer
|
|
690
|
+
} else if (h == fourcc("Imiq")) {
|
|
691
|
+
MultiIndexQuantizer* imiq = new MultiIndexQuantizer();
|
|
692
|
+
read_index_header(imiq, f);
|
|
693
|
+
read_ProductQuantizer(&imiq->pq, f);
|
|
553
694
|
idx = imiq;
|
|
554
|
-
} else if(h == fourcc
|
|
555
|
-
IndexRefine
|
|
556
|
-
read_index_header
|
|
695
|
+
} else if (h == fourcc("IxRF")) {
|
|
696
|
+
IndexRefine* idxrf = new IndexRefine();
|
|
697
|
+
read_index_header(idxrf, f);
|
|
557
698
|
idxrf->base_index = read_index(f, io_flags);
|
|
558
699
|
idxrf->refine_index = read_index(f, io_flags);
|
|
559
|
-
READ1
|
|
700
|
+
READ1(idxrf->k_factor);
|
|
560
701
|
if (dynamic_cast<IndexFlat*>(idxrf->refine_index)) {
|
|
561
702
|
// then make a RefineFlat with it
|
|
562
|
-
IndexRefine
|
|
703
|
+
IndexRefine* idxrf_old = idxrf;
|
|
563
704
|
idxrf = new IndexRefineFlat();
|
|
564
705
|
*idxrf = *idxrf_old;
|
|
565
706
|
delete idxrf_old;
|
|
@@ -567,248 +708,260 @@ Index *read_index (IOReader *f, int io_flags) {
|
|
|
567
708
|
idxrf->own_fields = true;
|
|
568
709
|
idxrf->own_refine_index = true;
|
|
569
710
|
idx = idxrf;
|
|
570
|
-
} else if(h == fourcc
|
|
571
|
-
bool is_map2 = h == fourcc
|
|
572
|
-
IndexIDMap
|
|
573
|
-
read_index_header
|
|
574
|
-
idxmap->index = read_index
|
|
711
|
+
} else if (h == fourcc("IxMp") || h == fourcc("IxM2")) {
|
|
712
|
+
bool is_map2 = h == fourcc("IxM2");
|
|
713
|
+
IndexIDMap* idxmap = is_map2 ? new IndexIDMap2() : new IndexIDMap();
|
|
714
|
+
read_index_header(idxmap, f);
|
|
715
|
+
idxmap->index = read_index(f, io_flags);
|
|
575
716
|
idxmap->own_fields = true;
|
|
576
|
-
READVECTOR
|
|
717
|
+
READVECTOR(idxmap->id_map);
|
|
577
718
|
if (is_map2) {
|
|
578
|
-
static_cast<IndexIDMap2*>(idxmap)->construct_rev_map
|
|
719
|
+
static_cast<IndexIDMap2*>(idxmap)->construct_rev_map();
|
|
579
720
|
}
|
|
580
721
|
idx = idxmap;
|
|
581
|
-
} else if (h == fourcc
|
|
582
|
-
Index2Layer
|
|
583
|
-
read_index_header
|
|
584
|
-
idxp->q1.quantizer = read_index
|
|
585
|
-
READ1
|
|
586
|
-
READ1
|
|
587
|
-
read_ProductQuantizer
|
|
588
|
-
READ1
|
|
589
|
-
READ1
|
|
590
|
-
READ1
|
|
591
|
-
READVECTOR
|
|
722
|
+
} else if (h == fourcc("Ix2L")) {
|
|
723
|
+
Index2Layer* idxp = new Index2Layer();
|
|
724
|
+
read_index_header(idxp, f);
|
|
725
|
+
idxp->q1.quantizer = read_index(f, io_flags);
|
|
726
|
+
READ1(idxp->q1.nlist);
|
|
727
|
+
READ1(idxp->q1.quantizer_trains_alone);
|
|
728
|
+
read_ProductQuantizer(&idxp->pq, f);
|
|
729
|
+
READ1(idxp->code_size_1);
|
|
730
|
+
READ1(idxp->code_size_2);
|
|
731
|
+
READ1(idxp->code_size);
|
|
732
|
+
READVECTOR(idxp->codes);
|
|
592
733
|
idx = idxp;
|
|
593
|
-
} else if
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
if (h == fourcc("
|
|
598
|
-
|
|
599
|
-
if (h == fourcc("
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
734
|
+
} else if (
|
|
735
|
+
h == fourcc("IHNf") || h == fourcc("IHNp") || h == fourcc("IHNs") ||
|
|
736
|
+
h == fourcc("IHN2")) {
|
|
737
|
+
IndexHNSW* idxhnsw = nullptr;
|
|
738
|
+
if (h == fourcc("IHNf"))
|
|
739
|
+
idxhnsw = new IndexHNSWFlat();
|
|
740
|
+
if (h == fourcc("IHNp"))
|
|
741
|
+
idxhnsw = new IndexHNSWPQ();
|
|
742
|
+
if (h == fourcc("IHNs"))
|
|
743
|
+
idxhnsw = new IndexHNSWSQ();
|
|
744
|
+
if (h == fourcc("IHN2"))
|
|
745
|
+
idxhnsw = new IndexHNSW2Level();
|
|
746
|
+
read_index_header(idxhnsw, f);
|
|
747
|
+
read_HNSW(&idxhnsw->hnsw, f);
|
|
748
|
+
idxhnsw->storage = read_index(f, io_flags);
|
|
603
749
|
idxhnsw->own_fields = true;
|
|
604
750
|
if (h == fourcc("IHNp")) {
|
|
605
|
-
dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table
|
|
751
|
+
dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table();
|
|
606
752
|
}
|
|
607
753
|
idx = idxhnsw;
|
|
608
|
-
} else if(h == fourcc("
|
|
609
|
-
|
|
610
|
-
read_index_header
|
|
611
|
-
|
|
612
|
-
READ1
|
|
613
|
-
READ1
|
|
614
|
-
READ1
|
|
615
|
-
READ1
|
|
616
|
-
READ1
|
|
617
|
-
|
|
754
|
+
} else if (h == fourcc("INSf")) {
|
|
755
|
+
IndexNSG* idxnsg = new IndexNSGFlat();
|
|
756
|
+
read_index_header(idxnsg, f);
|
|
757
|
+
READ1(idxnsg->GK);
|
|
758
|
+
READ1(idxnsg->build_type);
|
|
759
|
+
READ1(idxnsg->nndescent_S);
|
|
760
|
+
READ1(idxnsg->nndescent_R);
|
|
761
|
+
READ1(idxnsg->nndescent_L);
|
|
762
|
+
READ1(idxnsg->nndescent_iter);
|
|
763
|
+
read_NSG(&idxnsg->nsg, f);
|
|
764
|
+
idxnsg->storage = read_index(f, io_flags);
|
|
765
|
+
idxnsg->own_fields = true;
|
|
766
|
+
idx = idxnsg;
|
|
767
|
+
} else if (h == fourcc("IPfs")) {
|
|
768
|
+
IndexPQFastScan* idxpqfs = new IndexPQFastScan();
|
|
769
|
+
read_index_header(idxpqfs, f);
|
|
770
|
+
read_ProductQuantizer(&idxpqfs->pq, f);
|
|
771
|
+
READ1(idxpqfs->implem);
|
|
772
|
+
READ1(idxpqfs->bbs);
|
|
773
|
+
READ1(idxpqfs->qbs);
|
|
774
|
+
READ1(idxpqfs->ntotal2);
|
|
775
|
+
READ1(idxpqfs->M2);
|
|
776
|
+
READVECTOR(idxpqfs->codes);
|
|
618
777
|
idx = idxpqfs;
|
|
619
778
|
|
|
620
779
|
} else if (h == fourcc("IwPf")) {
|
|
621
|
-
IndexIVFPQFastScan
|
|
622
|
-
read_ivf_header
|
|
623
|
-
READ1
|
|
624
|
-
READ1
|
|
625
|
-
READ1
|
|
626
|
-
READ1
|
|
627
|
-
READ1
|
|
628
|
-
READ1
|
|
629
|
-
read_ProductQuantizer
|
|
630
|
-
read_InvertedLists
|
|
780
|
+
IndexIVFPQFastScan* ivpq = new IndexIVFPQFastScan();
|
|
781
|
+
read_ivf_header(ivpq, f);
|
|
782
|
+
READ1(ivpq->by_residual);
|
|
783
|
+
READ1(ivpq->code_size);
|
|
784
|
+
READ1(ivpq->bbs);
|
|
785
|
+
READ1(ivpq->M2);
|
|
786
|
+
READ1(ivpq->implem);
|
|
787
|
+
READ1(ivpq->qbs2);
|
|
788
|
+
read_ProductQuantizer(&ivpq->pq, f);
|
|
789
|
+
read_InvertedLists(ivpq, f, io_flags);
|
|
631
790
|
ivpq->precompute_table();
|
|
632
791
|
idx = ivpq;
|
|
633
792
|
} else {
|
|
634
793
|
FAISS_THROW_FMT(
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
794
|
+
"Index type 0x%08x (\"%s\") not recognized",
|
|
795
|
+
h,
|
|
796
|
+
fourcc_inv_printable(h).c_str());
|
|
638
797
|
idx = nullptr;
|
|
639
798
|
}
|
|
640
799
|
return idx;
|
|
641
800
|
}
|
|
642
801
|
|
|
643
|
-
|
|
644
|
-
Index *read_index (FILE * f, int io_flags) {
|
|
802
|
+
Index* read_index(FILE* f, int io_flags) {
|
|
645
803
|
FileIOReader reader(f);
|
|
646
804
|
return read_index(&reader, io_flags);
|
|
647
805
|
}
|
|
648
806
|
|
|
649
|
-
Index
|
|
807
|
+
Index* read_index(const char* fname, int io_flags) {
|
|
650
808
|
FileIOReader reader(fname);
|
|
651
|
-
Index
|
|
809
|
+
Index* idx = read_index(&reader, io_flags);
|
|
652
810
|
return idx;
|
|
653
811
|
}
|
|
654
812
|
|
|
655
|
-
VectorTransform
|
|
813
|
+
VectorTransform* read_VectorTransform(const char* fname) {
|
|
656
814
|
FileIOReader reader(fname);
|
|
657
|
-
VectorTransform
|
|
815
|
+
VectorTransform* vt = read_VectorTransform(&reader);
|
|
658
816
|
return vt;
|
|
659
817
|
}
|
|
660
818
|
|
|
661
|
-
|
|
662
|
-
|
|
663
819
|
/*************************************************************
|
|
664
820
|
* Read binary indexes
|
|
665
821
|
**************************************************************/
|
|
666
822
|
|
|
667
|
-
static void read_InvertedLists
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
823
|
+
static void read_InvertedLists(IndexBinaryIVF* ivf, IOReader* f, int io_flags) {
|
|
824
|
+
InvertedLists* ils = read_InvertedLists(f, io_flags);
|
|
825
|
+
FAISS_THROW_IF_NOT(
|
|
826
|
+
!ils ||
|
|
827
|
+
(ils->nlist == ivf->nlist && ils->code_size == ivf->code_size));
|
|
672
828
|
ivf->invlists = ils;
|
|
673
829
|
ivf->own_invlists = true;
|
|
674
830
|
}
|
|
675
831
|
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
READ1
|
|
680
|
-
READ1
|
|
681
|
-
READ1
|
|
682
|
-
READ1 (idx->is_trained);
|
|
683
|
-
READ1 (idx->metric_type);
|
|
832
|
+
static void read_index_binary_header(IndexBinary* idx, IOReader* f) {
|
|
833
|
+
READ1(idx->d);
|
|
834
|
+
READ1(idx->code_size);
|
|
835
|
+
READ1(idx->ntotal);
|
|
836
|
+
READ1(idx->is_trained);
|
|
837
|
+
READ1(idx->metric_type);
|
|
684
838
|
idx->verbose = false;
|
|
685
839
|
}
|
|
686
840
|
|
|
687
|
-
static void read_binary_ivf_header
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
{
|
|
691
|
-
read_index_binary_header
|
|
692
|
-
READ1
|
|
693
|
-
READ1
|
|
694
|
-
ivf->quantizer = read_index_binary
|
|
841
|
+
static void read_binary_ivf_header(
|
|
842
|
+
IndexBinaryIVF* ivf,
|
|
843
|
+
IOReader* f,
|
|
844
|
+
std::vector<std::vector<Index::idx_t>>* ids = nullptr) {
|
|
845
|
+
read_index_binary_header(ivf, f);
|
|
846
|
+
READ1(ivf->nlist);
|
|
847
|
+
READ1(ivf->nprobe);
|
|
848
|
+
ivf->quantizer = read_index_binary(f);
|
|
695
849
|
ivf->own_fields = true;
|
|
696
850
|
if (ids) { // used in legacy "Iv" formats
|
|
697
|
-
ids->resize
|
|
851
|
+
ids->resize(ivf->nlist);
|
|
698
852
|
for (size_t i = 0; i < ivf->nlist; i++)
|
|
699
|
-
READVECTOR
|
|
853
|
+
READVECTOR((*ids)[i]);
|
|
700
854
|
}
|
|
701
|
-
read_direct_map
|
|
855
|
+
read_direct_map(&ivf->direct_map, f);
|
|
702
856
|
}
|
|
703
857
|
|
|
704
|
-
static void read_binary_hash_invlists
|
|
705
|
-
IndexBinaryHash::InvertedListMap
|
|
706
|
-
int b,
|
|
707
|
-
{
|
|
858
|
+
static void read_binary_hash_invlists(
|
|
859
|
+
IndexBinaryHash::InvertedListMap& invlists,
|
|
860
|
+
int b,
|
|
861
|
+
IOReader* f) {
|
|
708
862
|
size_t sz;
|
|
709
|
-
READ1
|
|
863
|
+
READ1(sz);
|
|
710
864
|
int il_nbit = 0;
|
|
711
|
-
READ1
|
|
865
|
+
READ1(il_nbit);
|
|
712
866
|
// buffer for bitstrings
|
|
713
867
|
std::vector<uint8_t> buf((b + il_nbit) * sz);
|
|
714
|
-
READVECTOR
|
|
715
|
-
BitstringReader rd
|
|
716
|
-
invlists.reserve
|
|
868
|
+
READVECTOR(buf);
|
|
869
|
+
BitstringReader rd(buf.data(), buf.size());
|
|
870
|
+
invlists.reserve(sz);
|
|
717
871
|
for (size_t i = 0; i < sz; i++) {
|
|
718
872
|
uint64_t hash = rd.read(b);
|
|
719
873
|
uint64_t ilsz = rd.read(il_nbit);
|
|
720
|
-
auto
|
|
721
|
-
READVECTOR
|
|
722
|
-
FAISS_THROW_IF_NOT
|
|
723
|
-
READVECTOR
|
|
874
|
+
auto& il = invlists[hash];
|
|
875
|
+
READVECTOR(il.ids);
|
|
876
|
+
FAISS_THROW_IF_NOT(il.ids.size() == ilsz);
|
|
877
|
+
READVECTOR(il.vecs);
|
|
724
878
|
}
|
|
725
879
|
}
|
|
726
880
|
|
|
727
881
|
static void read_binary_multi_hash_map(
|
|
728
|
-
IndexBinaryMultiHash::Map
|
|
729
|
-
int b,
|
|
730
|
-
|
|
731
|
-
{
|
|
882
|
+
IndexBinaryMultiHash::Map& map,
|
|
883
|
+
int b,
|
|
884
|
+
size_t ntotal,
|
|
885
|
+
IOReader* f) {
|
|
732
886
|
int id_bits;
|
|
733
887
|
size_t sz;
|
|
734
|
-
READ1
|
|
735
|
-
READ1
|
|
888
|
+
READ1(id_bits);
|
|
889
|
+
READ1(sz);
|
|
736
890
|
std::vector<uint8_t> buf;
|
|
737
|
-
READVECTOR
|
|
891
|
+
READVECTOR(buf);
|
|
738
892
|
size_t nbit = (b + id_bits) * sz + ntotal * id_bits;
|
|
739
|
-
FAISS_THROW_IF_NOT
|
|
740
|
-
BitstringReader rd
|
|
741
|
-
map.reserve
|
|
893
|
+
FAISS_THROW_IF_NOT(buf.size() == (nbit + 7) / 8);
|
|
894
|
+
BitstringReader rd(buf.data(), buf.size());
|
|
895
|
+
map.reserve(sz);
|
|
742
896
|
for (size_t i = 0; i < sz; i++) {
|
|
743
897
|
uint64_t hash = rd.read(b);
|
|
744
898
|
uint64_t ilsz = rd.read(id_bits);
|
|
745
|
-
auto
|
|
899
|
+
auto& il = map[hash];
|
|
746
900
|
for (size_t j = 0; j < ilsz; j++) {
|
|
747
|
-
il.push_back
|
|
901
|
+
il.push_back(rd.read(id_bits));
|
|
748
902
|
}
|
|
749
903
|
}
|
|
750
904
|
}
|
|
751
905
|
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
IndexBinary *read_index_binary (IOReader *f, int io_flags) {
|
|
755
|
-
IndexBinary * idx = nullptr;
|
|
906
|
+
IndexBinary* read_index_binary(IOReader* f, int io_flags) {
|
|
907
|
+
IndexBinary* idx = nullptr;
|
|
756
908
|
uint32_t h;
|
|
757
|
-
READ1
|
|
758
|
-
if (h == fourcc
|
|
759
|
-
IndexBinaryFlat
|
|
760
|
-
read_index_binary_header
|
|
761
|
-
READVECTOR
|
|
762
|
-
FAISS_THROW_IF_NOT
|
|
909
|
+
READ1(h);
|
|
910
|
+
if (h == fourcc("IBxF")) {
|
|
911
|
+
IndexBinaryFlat* idxf = new IndexBinaryFlat();
|
|
912
|
+
read_index_binary_header(idxf, f);
|
|
913
|
+
READVECTOR(idxf->xb);
|
|
914
|
+
FAISS_THROW_IF_NOT(idxf->xb.size() == idxf->ntotal * idxf->code_size);
|
|
763
915
|
// leak!
|
|
764
916
|
idx = idxf;
|
|
765
|
-
} else if (h == fourcc
|
|
766
|
-
IndexBinaryIVF
|
|
767
|
-
read_binary_ivf_header
|
|
768
|
-
read_InvertedLists
|
|
917
|
+
} else if (h == fourcc("IBwF")) {
|
|
918
|
+
IndexBinaryIVF* ivf = new IndexBinaryIVF();
|
|
919
|
+
read_binary_ivf_header(ivf, f);
|
|
920
|
+
read_InvertedLists(ivf, f, io_flags);
|
|
769
921
|
idx = ivf;
|
|
770
|
-
} else if (h == fourcc
|
|
771
|
-
IndexBinaryFromFloat
|
|
772
|
-
read_index_binary_header
|
|
922
|
+
} else if (h == fourcc("IBFf")) {
|
|
923
|
+
IndexBinaryFromFloat* idxff = new IndexBinaryFromFloat();
|
|
924
|
+
read_index_binary_header(idxff, f);
|
|
773
925
|
idxff->own_fields = true;
|
|
774
|
-
idxff->index = read_index
|
|
926
|
+
idxff->index = read_index(f, io_flags);
|
|
775
927
|
idx = idxff;
|
|
776
|
-
} else if (h == fourcc
|
|
777
|
-
IndexBinaryHNSW
|
|
778
|
-
read_index_binary_header
|
|
779
|
-
read_HNSW
|
|
780
|
-
idxhnsw->storage = read_index_binary
|
|
928
|
+
} else if (h == fourcc("IBHf")) {
|
|
929
|
+
IndexBinaryHNSW* idxhnsw = new IndexBinaryHNSW();
|
|
930
|
+
read_index_binary_header(idxhnsw, f);
|
|
931
|
+
read_HNSW(&idxhnsw->hnsw, f);
|
|
932
|
+
idxhnsw->storage = read_index_binary(f, io_flags);
|
|
781
933
|
idxhnsw->own_fields = true;
|
|
782
934
|
idx = idxhnsw;
|
|
783
|
-
} else if(h == fourcc
|
|
784
|
-
bool is_map2 = h == fourcc
|
|
785
|
-
IndexBinaryIDMap
|
|
786
|
-
|
|
787
|
-
read_index_binary_header
|
|
788
|
-
idxmap->index = read_index_binary
|
|
935
|
+
} else if (h == fourcc("IBMp") || h == fourcc("IBM2")) {
|
|
936
|
+
bool is_map2 = h == fourcc("IBM2");
|
|
937
|
+
IndexBinaryIDMap* idxmap =
|
|
938
|
+
is_map2 ? new IndexBinaryIDMap2() : new IndexBinaryIDMap();
|
|
939
|
+
read_index_binary_header(idxmap, f);
|
|
940
|
+
idxmap->index = read_index_binary(f, io_flags);
|
|
789
941
|
idxmap->own_fields = true;
|
|
790
|
-
READVECTOR
|
|
942
|
+
READVECTOR(idxmap->id_map);
|
|
791
943
|
if (is_map2) {
|
|
792
|
-
static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map
|
|
944
|
+
static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map();
|
|
793
945
|
}
|
|
794
946
|
idx = idxmap;
|
|
795
|
-
} else if(h == fourcc("IBHh")) {
|
|
796
|
-
IndexBinaryHash
|
|
797
|
-
read_index_binary_header
|
|
798
|
-
READ1
|
|
799
|
-
READ1
|
|
947
|
+
} else if (h == fourcc("IBHh")) {
|
|
948
|
+
IndexBinaryHash* idxh = new IndexBinaryHash();
|
|
949
|
+
read_index_binary_header(idxh, f);
|
|
950
|
+
READ1(idxh->b);
|
|
951
|
+
READ1(idxh->nflip);
|
|
800
952
|
read_binary_hash_invlists(idxh->invlists, idxh->b, f);
|
|
801
953
|
idx = idxh;
|
|
802
|
-
} else if(h == fourcc("IBHm")) {
|
|
803
|
-
IndexBinaryMultiHash* idxmh = new IndexBinaryMultiHash
|
|
804
|
-
read_index_binary_header
|
|
805
|
-
idxmh->storage = dynamic_cast<IndexBinaryFlat*>
|
|
806
|
-
FAISS_THROW_IF_NOT(
|
|
954
|
+
} else if (h == fourcc("IBHm")) {
|
|
955
|
+
IndexBinaryMultiHash* idxmh = new IndexBinaryMultiHash();
|
|
956
|
+
read_index_binary_header(idxmh, f);
|
|
957
|
+
idxmh->storage = dynamic_cast<IndexBinaryFlat*>(read_index_binary(f));
|
|
958
|
+
FAISS_THROW_IF_NOT(
|
|
959
|
+
idxmh->storage && idxmh->storage->ntotal == idxmh->ntotal);
|
|
807
960
|
idxmh->own_fields = true;
|
|
808
|
-
READ1
|
|
809
|
-
READ1
|
|
810
|
-
READ1
|
|
811
|
-
idxmh->maps.resize
|
|
961
|
+
READ1(idxmh->b);
|
|
962
|
+
READ1(idxmh->nhash);
|
|
963
|
+
READ1(idxmh->nflip);
|
|
964
|
+
idxmh->maps.resize(idxmh->nhash);
|
|
812
965
|
for (int i = 0; i < idxmh->nhash; i++) {
|
|
813
966
|
read_binary_multi_hash_map(
|
|
814
967
|
idxmh->maps[i], idxmh->b, idxmh->ntotal, f);
|
|
@@ -816,25 +969,23 @@ IndexBinary *read_index_binary (IOReader *f, int io_flags) {
|
|
|
816
969
|
idx = idxmh;
|
|
817
970
|
} else {
|
|
818
971
|
FAISS_THROW_FMT(
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
972
|
+
"Index type %08x (\"%s\") not recognized",
|
|
973
|
+
h,
|
|
974
|
+
fourcc_inv_printable(h).c_str());
|
|
822
975
|
idx = nullptr;
|
|
823
976
|
}
|
|
824
977
|
return idx;
|
|
825
978
|
}
|
|
826
979
|
|
|
827
|
-
IndexBinary
|
|
980
|
+
IndexBinary* read_index_binary(FILE* f, int io_flags) {
|
|
828
981
|
FileIOReader reader(f);
|
|
829
982
|
return read_index_binary(&reader, io_flags);
|
|
830
983
|
}
|
|
831
984
|
|
|
832
|
-
IndexBinary
|
|
985
|
+
IndexBinary* read_index_binary(const char* fname, int io_flags) {
|
|
833
986
|
FileIOReader reader(fname);
|
|
834
|
-
IndexBinary
|
|
987
|
+
IndexBinary* idx = read_index_binary(&reader, io_flags);
|
|
835
988
|
return idx;
|
|
836
989
|
}
|
|
837
990
|
|
|
838
|
-
|
|
839
|
-
|
|
840
991
|
} // namespace faiss
|