faiss 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +20 -2
|
@@ -9,11 +9,14 @@
|
|
|
9
9
|
|
|
10
10
|
#include <faiss/index_io.h>
|
|
11
11
|
|
|
12
|
+
#include <faiss/impl/io.h>
|
|
13
|
+
#include <faiss/impl/io_macros.h>
|
|
14
|
+
|
|
12
15
|
#include <cstdio>
|
|
13
16
|
#include <cstdlib>
|
|
14
17
|
|
|
15
|
-
#include <sys/types.h>
|
|
16
18
|
#include <sys/stat.h>
|
|
19
|
+
#include <sys/types.h>
|
|
17
20
|
|
|
18
21
|
#include <faiss/invlists/InvertedListsIOHook.h>
|
|
19
22
|
|
|
@@ -22,30 +25,32 @@
|
|
|
22
25
|
#include <faiss/impl/io_macros.h>
|
|
23
26
|
#include <faiss/utils/hamming.h>
|
|
24
27
|
|
|
28
|
+
#include <faiss/Index2Layer.h>
|
|
25
29
|
#include <faiss/IndexFlat.h>
|
|
26
|
-
#include <faiss/
|
|
27
|
-
#include <faiss/IndexPreTransform.h>
|
|
28
|
-
#include <faiss/IndexLSH.h>
|
|
29
|
-
#include <faiss/IndexPQ.h>
|
|
30
|
+
#include <faiss/IndexHNSW.h>
|
|
30
31
|
#include <faiss/IndexIVF.h>
|
|
32
|
+
#include <faiss/IndexIVFFlat.h>
|
|
31
33
|
#include <faiss/IndexIVFPQ.h>
|
|
34
|
+
#include <faiss/IndexIVFPQFastScan.h>
|
|
32
35
|
#include <faiss/IndexIVFPQR.h>
|
|
33
|
-
#include <faiss/Index2Layer.h>
|
|
34
|
-
#include <faiss/IndexIVFFlat.h>
|
|
35
36
|
#include <faiss/IndexIVFSpectralHash.h>
|
|
36
|
-
#include <faiss/
|
|
37
|
-
#include <faiss/IndexScalarQuantizer.h>
|
|
38
|
-
#include <faiss/IndexHNSW.h>
|
|
37
|
+
#include <faiss/IndexLSH.h>
|
|
39
38
|
#include <faiss/IndexLattice.h>
|
|
39
|
+
#include <faiss/IndexNSG.h>
|
|
40
|
+
#include <faiss/IndexPQ.h>
|
|
40
41
|
#include <faiss/IndexPQFastScan.h>
|
|
41
|
-
#include <faiss/
|
|
42
|
+
#include <faiss/IndexPreTransform.h>
|
|
42
43
|
#include <faiss/IndexRefine.h>
|
|
44
|
+
#include <faiss/IndexResidual.h>
|
|
45
|
+
#include <faiss/IndexScalarQuantizer.h>
|
|
46
|
+
#include <faiss/MetaIndexes.h>
|
|
47
|
+
#include <faiss/VectorTransform.h>
|
|
43
48
|
|
|
44
49
|
#include <faiss/IndexBinaryFlat.h>
|
|
45
50
|
#include <faiss/IndexBinaryFromFloat.h>
|
|
46
51
|
#include <faiss/IndexBinaryHNSW.h>
|
|
47
|
-
#include <faiss/IndexBinaryIVF.h>
|
|
48
52
|
#include <faiss/IndexBinaryHash.h>
|
|
53
|
+
#include <faiss/IndexBinaryIVF.h>
|
|
49
54
|
|
|
50
55
|
/*************************************************************
|
|
51
56
|
* The I/O format is the content of the class. For objects that are
|
|
@@ -68,112 +73,120 @@
|
|
|
68
73
|
|
|
69
74
|
namespace faiss {
|
|
70
75
|
|
|
71
|
-
|
|
72
76
|
/*************************************************************
|
|
73
77
|
* Write
|
|
74
78
|
**************************************************************/
|
|
75
|
-
static void write_index_header
|
|
76
|
-
WRITE1
|
|
77
|
-
WRITE1
|
|
79
|
+
static void write_index_header(const Index* idx, IOWriter* f) {
|
|
80
|
+
WRITE1(idx->d);
|
|
81
|
+
WRITE1(idx->ntotal);
|
|
78
82
|
Index::idx_t dummy = 1 << 20;
|
|
79
|
-
WRITE1
|
|
80
|
-
WRITE1
|
|
81
|
-
WRITE1
|
|
82
|
-
WRITE1
|
|
83
|
+
WRITE1(dummy);
|
|
84
|
+
WRITE1(dummy);
|
|
85
|
+
WRITE1(idx->is_trained);
|
|
86
|
+
WRITE1(idx->metric_type);
|
|
83
87
|
if (idx->metric_type > 1) {
|
|
84
|
-
WRITE1
|
|
88
|
+
WRITE1(idx->metric_arg);
|
|
85
89
|
}
|
|
86
90
|
}
|
|
87
91
|
|
|
88
|
-
void write_VectorTransform
|
|
89
|
-
if (const LinearTransform
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
WRITE1
|
|
98
|
-
WRITE1
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
WRITEVECTOR
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
WRITE1 (h);
|
|
108
|
-
WRITE1 (itqm->max_iter);
|
|
109
|
-
WRITE1 (itqm->seed);
|
|
92
|
+
void write_VectorTransform(const VectorTransform* vt, IOWriter* f) {
|
|
93
|
+
if (const LinearTransform* lt = dynamic_cast<const LinearTransform*>(vt)) {
|
|
94
|
+
if (dynamic_cast<const RandomRotationMatrix*>(lt)) {
|
|
95
|
+
uint32_t h = fourcc("rrot");
|
|
96
|
+
WRITE1(h);
|
|
97
|
+
} else if (const PCAMatrix* pca = dynamic_cast<const PCAMatrix*>(lt)) {
|
|
98
|
+
uint32_t h = fourcc("PcAm");
|
|
99
|
+
WRITE1(h);
|
|
100
|
+
WRITE1(pca->eigen_power);
|
|
101
|
+
WRITE1(pca->random_rotation);
|
|
102
|
+
WRITE1(pca->balanced_bins);
|
|
103
|
+
WRITEVECTOR(pca->mean);
|
|
104
|
+
WRITEVECTOR(pca->eigenvalues);
|
|
105
|
+
WRITEVECTOR(pca->PCAMat);
|
|
106
|
+
} else if (const ITQMatrix* itqm = dynamic_cast<const ITQMatrix*>(lt)) {
|
|
107
|
+
uint32_t h = fourcc("Viqm");
|
|
108
|
+
WRITE1(h);
|
|
109
|
+
WRITE1(itqm->max_iter);
|
|
110
|
+
WRITE1(itqm->seed);
|
|
110
111
|
} else {
|
|
111
112
|
// generic LinearTransform (includes OPQ)
|
|
112
|
-
uint32_t h = fourcc
|
|
113
|
-
WRITE1
|
|
113
|
+
uint32_t h = fourcc("LTra");
|
|
114
|
+
WRITE1(h);
|
|
114
115
|
}
|
|
115
|
-
WRITE1
|
|
116
|
-
WRITEVECTOR
|
|
117
|
-
WRITEVECTOR
|
|
118
|
-
} else if (
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
116
|
+
WRITE1(lt->have_bias);
|
|
117
|
+
WRITEVECTOR(lt->A);
|
|
118
|
+
WRITEVECTOR(lt->b);
|
|
119
|
+
} else if (
|
|
120
|
+
const RemapDimensionsTransform* rdt =
|
|
121
|
+
dynamic_cast<const RemapDimensionsTransform*>(vt)) {
|
|
122
|
+
uint32_t h = fourcc("RmDT");
|
|
123
|
+
WRITE1(h);
|
|
124
|
+
WRITEVECTOR(rdt->map);
|
|
125
|
+
} else if (
|
|
126
|
+
const NormalizationTransform* nt =
|
|
127
|
+
dynamic_cast<const NormalizationTransform*>(vt)) {
|
|
128
|
+
uint32_t h = fourcc("VNrm");
|
|
129
|
+
WRITE1(h);
|
|
130
|
+
WRITE1(nt->norm);
|
|
131
|
+
} else if (
|
|
132
|
+
const CenteringTransform* ct =
|
|
133
|
+
dynamic_cast<const CenteringTransform*>(vt)) {
|
|
134
|
+
uint32_t h = fourcc("VCnt");
|
|
135
|
+
WRITE1(h);
|
|
136
|
+
WRITEVECTOR(ct->mean);
|
|
137
|
+
} else if (
|
|
138
|
+
const ITQTransform* itqt = dynamic_cast<const ITQTransform*>(vt)) {
|
|
139
|
+
uint32_t h = fourcc("Viqt");
|
|
140
|
+
WRITE1(h);
|
|
141
|
+
WRITEVECTOR(itqt->mean);
|
|
142
|
+
WRITE1(itqt->do_pca);
|
|
143
|
+
write_VectorTransform(&itqt->itq, f);
|
|
144
|
+
write_VectorTransform(&itqt->pca_then_itq, f);
|
|
141
145
|
} else {
|
|
142
|
-
FAISS_THROW_MSG
|
|
146
|
+
FAISS_THROW_MSG("cannot serialize this");
|
|
143
147
|
}
|
|
144
148
|
// common fields
|
|
145
|
-
WRITE1
|
|
146
|
-
WRITE1
|
|
147
|
-
WRITE1
|
|
149
|
+
WRITE1(vt->d_in);
|
|
150
|
+
WRITE1(vt->d_out);
|
|
151
|
+
WRITE1(vt->is_trained);
|
|
148
152
|
}
|
|
149
153
|
|
|
150
|
-
void write_ProductQuantizer
|
|
151
|
-
WRITE1
|
|
152
|
-
WRITE1
|
|
153
|
-
WRITE1
|
|
154
|
-
WRITEVECTOR
|
|
154
|
+
void write_ProductQuantizer(const ProductQuantizer* pq, IOWriter* f) {
|
|
155
|
+
WRITE1(pq->d);
|
|
156
|
+
WRITE1(pq->M);
|
|
157
|
+
WRITE1(pq->nbits);
|
|
158
|
+
WRITEVECTOR(pq->centroids);
|
|
155
159
|
}
|
|
156
160
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
WRITE1
|
|
160
|
-
|
|
161
|
-
WRITE1
|
|
162
|
-
WRITE1
|
|
163
|
-
WRITE1
|
|
164
|
-
WRITEVECTOR
|
|
161
|
+
void write_ResidualQuantizer(const ResidualQuantizer* rq, IOWriter* f) {
|
|
162
|
+
WRITE1(rq->d);
|
|
163
|
+
WRITE1(rq->M);
|
|
164
|
+
WRITEVECTOR(rq->nbits);
|
|
165
|
+
WRITE1(rq->is_trained);
|
|
166
|
+
WRITE1(rq->train_type);
|
|
167
|
+
WRITE1(rq->max_beam_size);
|
|
168
|
+
WRITEVECTOR(rq->codebooks);
|
|
165
169
|
}
|
|
166
170
|
|
|
167
|
-
void
|
|
171
|
+
static void write_ScalarQuantizer(const ScalarQuantizer* ivsc, IOWriter* f) {
|
|
172
|
+
WRITE1(ivsc->qtype);
|
|
173
|
+
WRITE1(ivsc->rangestat);
|
|
174
|
+
WRITE1(ivsc->rangestat_arg);
|
|
175
|
+
WRITE1(ivsc->d);
|
|
176
|
+
WRITE1(ivsc->code_size);
|
|
177
|
+
WRITEVECTOR(ivsc->trained);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
void write_InvertedLists(const InvertedLists* ils, IOWriter* f) {
|
|
168
181
|
if (ils == nullptr) {
|
|
169
|
-
uint32_t h = fourcc
|
|
170
|
-
WRITE1
|
|
171
|
-
} else if (
|
|
172
|
-
|
|
173
|
-
uint32_t h = fourcc
|
|
174
|
-
WRITE1
|
|
175
|
-
WRITE1
|
|
176
|
-
WRITE1
|
|
182
|
+
uint32_t h = fourcc("il00");
|
|
183
|
+
WRITE1(h);
|
|
184
|
+
} else if (
|
|
185
|
+
const auto& ails = dynamic_cast<const ArrayInvertedLists*>(ils)) {
|
|
186
|
+
uint32_t h = fourcc("ilar");
|
|
187
|
+
WRITE1(h);
|
|
188
|
+
WRITE1(ails->nlist);
|
|
189
|
+
WRITE1(ails->code_size);
|
|
177
190
|
// here we store either as a full or a sparse data buffer
|
|
178
191
|
size_t n_non0 = 0;
|
|
179
192
|
for (size_t i = 0; i < ails->nlist; i++) {
|
|
@@ -182,329 +195,401 @@ void write_InvertedLists (const InvertedLists *ils, IOWriter *f) {
|
|
|
182
195
|
}
|
|
183
196
|
if (n_non0 > ails->nlist / 2) {
|
|
184
197
|
uint32_t list_type = fourcc("full");
|
|
185
|
-
WRITE1
|
|
198
|
+
WRITE1(list_type);
|
|
186
199
|
std::vector<size_t> sizes;
|
|
187
200
|
for (size_t i = 0; i < ails->nlist; i++) {
|
|
188
|
-
sizes.push_back
|
|
201
|
+
sizes.push_back(ails->ids[i].size());
|
|
189
202
|
}
|
|
190
|
-
WRITEVECTOR
|
|
203
|
+
WRITEVECTOR(sizes);
|
|
191
204
|
} else {
|
|
192
205
|
int list_type = fourcc("sprs"); // sparse
|
|
193
|
-
WRITE1
|
|
206
|
+
WRITE1(list_type);
|
|
194
207
|
std::vector<size_t> sizes;
|
|
195
208
|
for (size_t i = 0; i < ails->nlist; i++) {
|
|
196
209
|
size_t n = ails->ids[i].size();
|
|
197
210
|
if (n > 0) {
|
|
198
|
-
sizes.push_back
|
|
199
|
-
sizes.push_back
|
|
211
|
+
sizes.push_back(i);
|
|
212
|
+
sizes.push_back(n);
|
|
200
213
|
}
|
|
201
214
|
}
|
|
202
|
-
WRITEVECTOR
|
|
215
|
+
WRITEVECTOR(sizes);
|
|
203
216
|
}
|
|
204
217
|
// make a single contiguous data buffer (useful for mmapping)
|
|
205
218
|
for (size_t i = 0; i < ails->nlist; i++) {
|
|
206
219
|
size_t n = ails->ids[i].size();
|
|
207
220
|
if (n > 0) {
|
|
208
|
-
WRITEANDCHECK
|
|
209
|
-
WRITEANDCHECK
|
|
221
|
+
WRITEANDCHECK(ails->codes[i].data(), n * ails->code_size);
|
|
222
|
+
WRITEANDCHECK(ails->ids[i].data(), n);
|
|
210
223
|
}
|
|
211
224
|
}
|
|
212
225
|
|
|
213
226
|
} else {
|
|
214
|
-
InvertedListsIOHook::lookup_classname(
|
|
215
|
-
|
|
227
|
+
InvertedListsIOHook::lookup_classname(typeid(*ils).name())
|
|
228
|
+
->write(ils, f);
|
|
216
229
|
}
|
|
217
230
|
}
|
|
218
231
|
|
|
219
|
-
|
|
220
|
-
void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) {
|
|
232
|
+
void write_ProductQuantizer(const ProductQuantizer* pq, const char* fname) {
|
|
221
233
|
FileIOWriter writer(fname);
|
|
222
|
-
write_ProductQuantizer
|
|
234
|
+
write_ProductQuantizer(pq, &writer);
|
|
223
235
|
}
|
|
224
236
|
|
|
225
|
-
static void write_HNSW
|
|
237
|
+
static void write_HNSW(const HNSW* hnsw, IOWriter* f) {
|
|
238
|
+
WRITEVECTOR(hnsw->assign_probas);
|
|
239
|
+
WRITEVECTOR(hnsw->cum_nneighbor_per_level);
|
|
240
|
+
WRITEVECTOR(hnsw->levels);
|
|
241
|
+
WRITEVECTOR(hnsw->offsets);
|
|
242
|
+
WRITEVECTOR(hnsw->neighbors);
|
|
243
|
+
|
|
244
|
+
WRITE1(hnsw->entry_point);
|
|
245
|
+
WRITE1(hnsw->max_level);
|
|
246
|
+
WRITE1(hnsw->efConstruction);
|
|
247
|
+
WRITE1(hnsw->efSearch);
|
|
248
|
+
WRITE1(hnsw->upper_beam);
|
|
249
|
+
}
|
|
226
250
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
251
|
+
static void write_NSG(const NSG* nsg, IOWriter* f) {
|
|
252
|
+
WRITE1(nsg->ntotal);
|
|
253
|
+
WRITE1(nsg->R);
|
|
254
|
+
WRITE1(nsg->L);
|
|
255
|
+
WRITE1(nsg->C);
|
|
256
|
+
WRITE1(nsg->search_L);
|
|
257
|
+
WRITE1(nsg->enterpoint);
|
|
258
|
+
WRITE1(nsg->is_built);
|
|
259
|
+
|
|
260
|
+
if (!nsg->is_built) {
|
|
261
|
+
return;
|
|
262
|
+
}
|
|
232
263
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
264
|
+
constexpr int EMPTY_ID = -1;
|
|
265
|
+
auto& graph = nsg->final_graph;
|
|
266
|
+
int K = graph->K;
|
|
267
|
+
int N = graph->N;
|
|
268
|
+
FAISS_THROW_IF_NOT(N == nsg->ntotal);
|
|
269
|
+
FAISS_THROW_IF_NOT(K == nsg->R);
|
|
270
|
+
FAISS_THROW_IF_NOT(true == graph->own_fields);
|
|
271
|
+
|
|
272
|
+
int size = 0;
|
|
273
|
+
for (int i = 0; i < N; i++) {
|
|
274
|
+
for (int j = 0; j < K; j++) {
|
|
275
|
+
int id = graph->at(i, j);
|
|
276
|
+
if (id != EMPTY_ID) {
|
|
277
|
+
WRITE1(id);
|
|
278
|
+
size += 1;
|
|
279
|
+
} else {
|
|
280
|
+
break;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
WRITE1(EMPTY_ID);
|
|
284
|
+
}
|
|
238
285
|
}
|
|
239
286
|
|
|
240
|
-
static void write_direct_map
|
|
241
|
-
char maintain_direct_map =
|
|
242
|
-
|
|
243
|
-
|
|
287
|
+
static void write_direct_map(const DirectMap* dm, IOWriter* f) {
|
|
288
|
+
char maintain_direct_map =
|
|
289
|
+
(char)dm->type; // for backwards compatibility with bool
|
|
290
|
+
WRITE1(maintain_direct_map);
|
|
291
|
+
WRITEVECTOR(dm->array);
|
|
244
292
|
if (dm->type == DirectMap::Hashtable) {
|
|
245
293
|
using idx_t = Index::idx_t;
|
|
246
294
|
std::vector<std::pair<idx_t, idx_t>> v;
|
|
247
|
-
const std::unordered_map<idx_t, idx_t
|
|
248
|
-
v.resize
|
|
295
|
+
const std::unordered_map<idx_t, idx_t>& map = dm->hashtable;
|
|
296
|
+
v.resize(map.size());
|
|
249
297
|
std::copy(map.begin(), map.end(), v.begin());
|
|
250
|
-
WRITEVECTOR
|
|
298
|
+
WRITEVECTOR(v);
|
|
251
299
|
}
|
|
252
300
|
}
|
|
253
301
|
|
|
254
|
-
static void write_ivf_header
|
|
255
|
-
write_index_header
|
|
256
|
-
WRITE1
|
|
257
|
-
WRITE1
|
|
258
|
-
write_index
|
|
259
|
-
write_direct_map
|
|
302
|
+
static void write_ivf_header(const IndexIVF* ivf, IOWriter* f) {
|
|
303
|
+
write_index_header(ivf, f);
|
|
304
|
+
WRITE1(ivf->nlist);
|
|
305
|
+
WRITE1(ivf->nprobe);
|
|
306
|
+
write_index(ivf->quantizer, f);
|
|
307
|
+
write_direct_map(&ivf->direct_map, f);
|
|
260
308
|
}
|
|
261
309
|
|
|
262
|
-
void write_index
|
|
263
|
-
if (const IndexFlat
|
|
264
|
-
uint32_t h =
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
WRITE1
|
|
276
|
-
WRITE1
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
310
|
+
void write_index(const Index* idx, IOWriter* f) {
|
|
311
|
+
if (const IndexFlat* idxf = dynamic_cast<const IndexFlat*>(idx)) {
|
|
312
|
+
uint32_t h =
|
|
313
|
+
fourcc(idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI"
|
|
314
|
+
: idxf->metric_type == METRIC_L2 ? "IxF2"
|
|
315
|
+
: "IxFl");
|
|
316
|
+
WRITE1(h);
|
|
317
|
+
write_index_header(idx, f);
|
|
318
|
+
WRITEVECTOR(idxf->xb);
|
|
319
|
+
} else if (const IndexLSH* idxl = dynamic_cast<const IndexLSH*>(idx)) {
|
|
320
|
+
uint32_t h = fourcc("IxHe");
|
|
321
|
+
WRITE1(h);
|
|
322
|
+
write_index_header(idx, f);
|
|
323
|
+
WRITE1(idxl->nbits);
|
|
324
|
+
WRITE1(idxl->rotate_data);
|
|
325
|
+
WRITE1(idxl->train_thresholds);
|
|
326
|
+
WRITEVECTOR(idxl->thresholds);
|
|
327
|
+
WRITE1(idxl->bytes_per_vec);
|
|
328
|
+
write_VectorTransform(&idxl->rrot, f);
|
|
329
|
+
WRITEVECTOR(idxl->codes);
|
|
330
|
+
} else if (const IndexPQ* idxp = dynamic_cast<const IndexPQ*>(idx)) {
|
|
331
|
+
uint32_t h = fourcc("IxPq");
|
|
332
|
+
WRITE1(h);
|
|
333
|
+
write_index_header(idx, f);
|
|
334
|
+
write_ProductQuantizer(&idxp->pq, f);
|
|
335
|
+
WRITEVECTOR(idxp->codes);
|
|
287
336
|
// search params -- maybe not useful to store?
|
|
288
|
-
WRITE1
|
|
289
|
-
WRITE1
|
|
290
|
-
WRITE1
|
|
291
|
-
} else if(
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
WRITE1
|
|
299
|
-
|
|
300
|
-
WRITE1
|
|
301
|
-
WRITE1
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
uint32_t h = fourcc
|
|
307
|
-
WRITE1
|
|
308
|
-
write_index_header
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
} else if(
|
|
312
|
-
|
|
313
|
-
uint32_t h = fourcc
|
|
314
|
-
WRITE1
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
WRITE1
|
|
318
|
-
WRITE1
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
337
|
+
WRITE1(idxp->search_type);
|
|
338
|
+
WRITE1(idxp->encode_signs);
|
|
339
|
+
WRITE1(idxp->polysemous_ht);
|
|
340
|
+
} else if (
|
|
341
|
+
const IndexResidual* idxr =
|
|
342
|
+
dynamic_cast<const IndexResidual*>(idx)) {
|
|
343
|
+
uint32_t h = fourcc("IxRQ");
|
|
344
|
+
WRITE1(h);
|
|
345
|
+
write_index_header(idx, f);
|
|
346
|
+
write_ResidualQuantizer(&idxr->rq, f);
|
|
347
|
+
WRITE1(idxr->search_type);
|
|
348
|
+
WRITE1(idxr->norm_min);
|
|
349
|
+
WRITE1(idxr->norm_max);
|
|
350
|
+
WRITE1(idxr->code_size);
|
|
351
|
+
WRITEVECTOR(idxr->codes);
|
|
352
|
+
} else if (
|
|
353
|
+
const ResidualCoarseQuantizer* idxr =
|
|
354
|
+
dynamic_cast<const ResidualCoarseQuantizer*>(idx)) {
|
|
355
|
+
uint32_t h = fourcc("ImRQ");
|
|
356
|
+
WRITE1(h);
|
|
357
|
+
write_index_header(idx, f);
|
|
358
|
+
write_ResidualQuantizer(&idxr->rq, f);
|
|
359
|
+
WRITE1(idxr->beam_factor);
|
|
360
|
+
} else if (
|
|
361
|
+
const Index2Layer* idxp = dynamic_cast<const Index2Layer*>(idx)) {
|
|
362
|
+
uint32_t h = fourcc("Ix2L");
|
|
363
|
+
WRITE1(h);
|
|
364
|
+
write_index_header(idx, f);
|
|
365
|
+
write_index(idxp->q1.quantizer, f);
|
|
366
|
+
WRITE1(idxp->q1.nlist);
|
|
367
|
+
WRITE1(idxp->q1.quantizer_trains_alone);
|
|
368
|
+
write_ProductQuantizer(&idxp->pq, f);
|
|
369
|
+
WRITE1(idxp->code_size_1);
|
|
370
|
+
WRITE1(idxp->code_size_2);
|
|
371
|
+
WRITE1(idxp->code_size);
|
|
372
|
+
WRITEVECTOR(idxp->codes);
|
|
373
|
+
} else if (
|
|
374
|
+
const IndexScalarQuantizer* idxs =
|
|
375
|
+
dynamic_cast<const IndexScalarQuantizer*>(idx)) {
|
|
376
|
+
uint32_t h = fourcc("IxSQ");
|
|
377
|
+
WRITE1(h);
|
|
378
|
+
write_index_header(idx, f);
|
|
379
|
+
write_ScalarQuantizer(&idxs->sq, f);
|
|
380
|
+
WRITEVECTOR(idxs->codes);
|
|
381
|
+
} else if (
|
|
382
|
+
const IndexLattice* idxl = dynamic_cast<const IndexLattice*>(idx)) {
|
|
383
|
+
uint32_t h = fourcc("IxLa");
|
|
384
|
+
WRITE1(h);
|
|
385
|
+
WRITE1(idxl->d);
|
|
386
|
+
WRITE1(idxl->nsq);
|
|
387
|
+
WRITE1(idxl->scale_nbit);
|
|
388
|
+
WRITE1(idxl->zn_sphere_codec.r2);
|
|
389
|
+
write_index_header(idx, f);
|
|
390
|
+
WRITEVECTOR(idxl->trained);
|
|
391
|
+
} else if (
|
|
392
|
+
const IndexIVFFlatDedup* ivfl =
|
|
393
|
+
dynamic_cast<const IndexIVFFlatDedup*>(idx)) {
|
|
394
|
+
uint32_t h = fourcc("IwFd");
|
|
395
|
+
WRITE1(h);
|
|
396
|
+
write_ivf_header(ivfl, f);
|
|
326
397
|
{
|
|
327
|
-
std::vector<Index::idx_t> tab
|
|
398
|
+
std::vector<Index::idx_t> tab(2 * ivfl->instances.size());
|
|
328
399
|
long i = 0;
|
|
329
|
-
for (auto it = ivfl->instances.begin();
|
|
330
|
-
|
|
400
|
+
for (auto it = ivfl->instances.begin(); it != ivfl->instances.end();
|
|
401
|
+
++it) {
|
|
331
402
|
tab[i++] = it->first;
|
|
332
403
|
tab[i++] = it->second;
|
|
333
404
|
}
|
|
334
|
-
WRITEVECTOR
|
|
405
|
+
WRITEVECTOR(tab);
|
|
335
406
|
}
|
|
336
|
-
write_InvertedLists
|
|
337
|
-
} else if(
|
|
338
|
-
|
|
339
|
-
uint32_t h = fourcc
|
|
340
|
-
WRITE1
|
|
341
|
-
write_ivf_header
|
|
342
|
-
write_InvertedLists
|
|
343
|
-
} else if(
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
WRITE1
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
WRITE1
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
WRITE1
|
|
372
|
-
|
|
373
|
-
|
|
407
|
+
write_InvertedLists(ivfl->invlists, f);
|
|
408
|
+
} else if (
|
|
409
|
+
const IndexIVFFlat* ivfl = dynamic_cast<const IndexIVFFlat*>(idx)) {
|
|
410
|
+
uint32_t h = fourcc("IwFl");
|
|
411
|
+
WRITE1(h);
|
|
412
|
+
write_ivf_header(ivfl, f);
|
|
413
|
+
write_InvertedLists(ivfl->invlists, f);
|
|
414
|
+
} else if (
|
|
415
|
+
const IndexIVFScalarQuantizer* ivsc =
|
|
416
|
+
dynamic_cast<const IndexIVFScalarQuantizer*>(idx)) {
|
|
417
|
+
uint32_t h = fourcc("IwSq");
|
|
418
|
+
WRITE1(h);
|
|
419
|
+
write_ivf_header(ivsc, f);
|
|
420
|
+
write_ScalarQuantizer(&ivsc->sq, f);
|
|
421
|
+
WRITE1(ivsc->code_size);
|
|
422
|
+
WRITE1(ivsc->by_residual);
|
|
423
|
+
write_InvertedLists(ivsc->invlists, f);
|
|
424
|
+
} else if (
|
|
425
|
+
const IndexIVFSpectralHash* ivsp =
|
|
426
|
+
dynamic_cast<const IndexIVFSpectralHash*>(idx)) {
|
|
427
|
+
uint32_t h = fourcc("IwSh");
|
|
428
|
+
WRITE1(h);
|
|
429
|
+
write_ivf_header(ivsp, f);
|
|
430
|
+
write_VectorTransform(ivsp->vt, f);
|
|
431
|
+
WRITE1(ivsp->nbit);
|
|
432
|
+
WRITE1(ivsp->period);
|
|
433
|
+
WRITE1(ivsp->threshold_type);
|
|
434
|
+
WRITEVECTOR(ivsp->trained);
|
|
435
|
+
write_InvertedLists(ivsp->invlists, f);
|
|
436
|
+
} else if (const IndexIVFPQ* ivpq = dynamic_cast<const IndexIVFPQ*>(idx)) {
|
|
437
|
+
const IndexIVFPQR* ivfpqr = dynamic_cast<const IndexIVFPQR*>(idx);
|
|
438
|
+
|
|
439
|
+
uint32_t h = fourcc(ivfpqr ? "IwQR" : "IwPQ");
|
|
440
|
+
WRITE1(h);
|
|
441
|
+
write_ivf_header(ivpq, f);
|
|
442
|
+
WRITE1(ivpq->by_residual);
|
|
443
|
+
WRITE1(ivpq->code_size);
|
|
444
|
+
write_ProductQuantizer(&ivpq->pq, f);
|
|
445
|
+
write_InvertedLists(ivpq->invlists, f);
|
|
374
446
|
if (ivfpqr) {
|
|
375
|
-
write_ProductQuantizer
|
|
376
|
-
WRITEVECTOR
|
|
377
|
-
WRITE1
|
|
447
|
+
write_ProductQuantizer(&ivfpqr->refine_pq, f);
|
|
448
|
+
WRITEVECTOR(ivfpqr->refine_codes);
|
|
449
|
+
WRITE1(ivfpqr->k_factor);
|
|
378
450
|
}
|
|
379
451
|
|
|
380
|
-
} else if(
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
452
|
+
} else if (
|
|
453
|
+
const IndexPreTransform* ixpt =
|
|
454
|
+
dynamic_cast<const IndexPreTransform*>(idx)) {
|
|
455
|
+
uint32_t h = fourcc("IxPT");
|
|
456
|
+
WRITE1(h);
|
|
457
|
+
write_index_header(ixpt, f);
|
|
385
458
|
int nt = ixpt->chain.size();
|
|
386
|
-
WRITE1
|
|
459
|
+
WRITE1(nt);
|
|
387
460
|
for (int i = 0; i < nt; i++)
|
|
388
|
-
write_VectorTransform
|
|
389
|
-
write_index
|
|
390
|
-
} else if(
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
write_index
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
461
|
+
write_VectorTransform(ixpt->chain[i], f);
|
|
462
|
+
write_index(ixpt->index, f);
|
|
463
|
+
} else if (
|
|
464
|
+
const MultiIndexQuantizer* imiq =
|
|
465
|
+
dynamic_cast<const MultiIndexQuantizer*>(idx)) {
|
|
466
|
+
uint32_t h = fourcc("Imiq");
|
|
467
|
+
WRITE1(h);
|
|
468
|
+
write_index_header(imiq, f);
|
|
469
|
+
write_ProductQuantizer(&imiq->pq, f);
|
|
470
|
+
} else if (
|
|
471
|
+
const IndexRefine* idxrf = dynamic_cast<const IndexRefine*>(idx)) {
|
|
472
|
+
uint32_t h = fourcc("IxRF");
|
|
473
|
+
WRITE1(h);
|
|
474
|
+
write_index_header(idxrf, f);
|
|
475
|
+
write_index(idxrf->base_index, f);
|
|
476
|
+
write_index(idxrf->refine_index, f);
|
|
477
|
+
WRITE1(idxrf->k_factor);
|
|
478
|
+
} else if (
|
|
479
|
+
const IndexIDMap* idxmap = dynamic_cast<const IndexIDMap*>(idx)) {
|
|
480
|
+
uint32_t h = dynamic_cast<const IndexIDMap2*>(idx) ? fourcc("IxM2")
|
|
481
|
+
: fourcc("IxMp");
|
|
409
482
|
// no need to store additional info for IndexIDMap2
|
|
410
|
-
WRITE1
|
|
411
|
-
write_index_header
|
|
412
|
-
write_index
|
|
413
|
-
WRITEVECTOR
|
|
414
|
-
} else if(const IndexHNSW
|
|
415
|
-
|
|
483
|
+
WRITE1(h);
|
|
484
|
+
write_index_header(idxmap, f);
|
|
485
|
+
write_index(idxmap->index, f);
|
|
486
|
+
WRITEVECTOR(idxmap->id_map);
|
|
487
|
+
} else if (const IndexHNSW* idxhnsw = dynamic_cast<const IndexHNSW*>(idx)) {
|
|
488
|
+
uint32_t h = dynamic_cast<const IndexHNSWFlat*>(idx) ? fourcc("IHNf")
|
|
489
|
+
: dynamic_cast<const IndexHNSWPQ*>(idx) ? fourcc("IHNp")
|
|
490
|
+
: dynamic_cast<const IndexHNSWSQ*>(idx) ? fourcc("IHNs")
|
|
491
|
+
: dynamic_cast<const IndexHNSW2Level*>(idx) ? fourcc("IHN2")
|
|
492
|
+
: 0;
|
|
493
|
+
FAISS_THROW_IF_NOT(h != 0);
|
|
494
|
+
WRITE1(h);
|
|
495
|
+
write_index_header(idxhnsw, f);
|
|
496
|
+
write_HNSW(&idxhnsw->hnsw, f);
|
|
497
|
+
write_index(idxhnsw->storage, f);
|
|
498
|
+
} else if (const IndexNSG* idxnsg = dynamic_cast<const IndexNSG*>(idx)) {
|
|
416
499
|
uint32_t h =
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
WRITE1
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
500
|
+
dynamic_cast<const IndexNSGFlat*>(idx) ? fourcc("INSf") : 0;
|
|
501
|
+
FAISS_THROW_IF_NOT(h != 0);
|
|
502
|
+
WRITE1(h);
|
|
503
|
+
write_index_header(idxnsg, f);
|
|
504
|
+
WRITE1(idxnsg->GK);
|
|
505
|
+
WRITE1(idxnsg->build_type);
|
|
506
|
+
WRITE1(idxnsg->nndescent_S);
|
|
507
|
+
WRITE1(idxnsg->nndescent_R);
|
|
508
|
+
WRITE1(idxnsg->nndescent_L);
|
|
509
|
+
WRITE1(idxnsg->nndescent_iter);
|
|
510
|
+
write_NSG(&idxnsg->nsg, f);
|
|
511
|
+
write_index(idxnsg->storage, f);
|
|
512
|
+
} else if (
|
|
513
|
+
const IndexPQFastScan* idxpqfs =
|
|
514
|
+
dynamic_cast<const IndexPQFastScan*>(idx)) {
|
|
429
515
|
uint32_t h = fourcc("IPfs");
|
|
430
|
-
WRITE1
|
|
431
|
-
write_index_header
|
|
432
|
-
write_ProductQuantizer
|
|
433
|
-
WRITE1
|
|
434
|
-
WRITE1
|
|
435
|
-
WRITE1
|
|
436
|
-
WRITE1
|
|
437
|
-
WRITE1
|
|
438
|
-
WRITEVECTOR
|
|
439
|
-
} else if (
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
WRITE1
|
|
446
|
-
WRITE1
|
|
447
|
-
WRITE1
|
|
448
|
-
WRITE1
|
|
449
|
-
WRITE1
|
|
450
|
-
|
|
451
|
-
|
|
516
|
+
WRITE1(h);
|
|
517
|
+
write_index_header(idxpqfs, f);
|
|
518
|
+
write_ProductQuantizer(&idxpqfs->pq, f);
|
|
519
|
+
WRITE1(idxpqfs->implem);
|
|
520
|
+
WRITE1(idxpqfs->bbs);
|
|
521
|
+
WRITE1(idxpqfs->qbs);
|
|
522
|
+
WRITE1(idxpqfs->ntotal2);
|
|
523
|
+
WRITE1(idxpqfs->M2);
|
|
524
|
+
WRITEVECTOR(idxpqfs->codes);
|
|
525
|
+
} else if (
|
|
526
|
+
const IndexIVFPQFastScan* ivpq =
|
|
527
|
+
dynamic_cast<const IndexIVFPQFastScan*>(idx)) {
|
|
528
|
+
uint32_t h = fourcc("IwPf");
|
|
529
|
+
WRITE1(h);
|
|
530
|
+
write_ivf_header(ivpq, f);
|
|
531
|
+
WRITE1(ivpq->by_residual);
|
|
532
|
+
WRITE1(ivpq->code_size);
|
|
533
|
+
WRITE1(ivpq->bbs);
|
|
534
|
+
WRITE1(ivpq->M2);
|
|
535
|
+
WRITE1(ivpq->implem);
|
|
536
|
+
WRITE1(ivpq->qbs2);
|
|
537
|
+
write_ProductQuantizer(&ivpq->pq, f);
|
|
538
|
+
write_InvertedLists(ivpq->invlists, f);
|
|
452
539
|
} else {
|
|
453
|
-
FAISS_THROW_MSG
|
|
540
|
+
FAISS_THROW_MSG("don't know how to serialize this type of index");
|
|
454
541
|
}
|
|
455
542
|
}
|
|
456
543
|
|
|
457
|
-
void write_index
|
|
544
|
+
void write_index(const Index* idx, FILE* f) {
|
|
458
545
|
FileIOWriter writer(f);
|
|
459
|
-
write_index
|
|
546
|
+
write_index(idx, &writer);
|
|
460
547
|
}
|
|
461
548
|
|
|
462
|
-
void write_index
|
|
549
|
+
void write_index(const Index* idx, const char* fname) {
|
|
463
550
|
FileIOWriter writer(fname);
|
|
464
|
-
write_index
|
|
551
|
+
write_index(idx, &writer);
|
|
465
552
|
}
|
|
466
553
|
|
|
467
|
-
void write_VectorTransform
|
|
554
|
+
void write_VectorTransform(const VectorTransform* vt, const char* fname) {
|
|
468
555
|
FileIOWriter writer(fname);
|
|
469
|
-
write_VectorTransform
|
|
556
|
+
write_VectorTransform(vt, &writer);
|
|
470
557
|
}
|
|
471
558
|
|
|
472
|
-
|
|
473
559
|
/*************************************************************
|
|
474
560
|
* Write binary indexes
|
|
475
561
|
**************************************************************/
|
|
476
562
|
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
WRITE1
|
|
480
|
-
WRITE1
|
|
481
|
-
WRITE1
|
|
482
|
-
WRITE1
|
|
483
|
-
WRITE1 (idx->metric_type);
|
|
563
|
+
static void write_index_binary_header(const IndexBinary* idx, IOWriter* f) {
|
|
564
|
+
WRITE1(idx->d);
|
|
565
|
+
WRITE1(idx->code_size);
|
|
566
|
+
WRITE1(idx->ntotal);
|
|
567
|
+
WRITE1(idx->is_trained);
|
|
568
|
+
WRITE1(idx->metric_type);
|
|
484
569
|
}
|
|
485
570
|
|
|
486
|
-
static void write_binary_ivf_header
|
|
487
|
-
write_index_binary_header
|
|
488
|
-
WRITE1
|
|
489
|
-
WRITE1
|
|
490
|
-
write_index_binary
|
|
491
|
-
write_direct_map
|
|
571
|
+
static void write_binary_ivf_header(const IndexBinaryIVF* ivf, IOWriter* f) {
|
|
572
|
+
write_index_binary_header(ivf, f);
|
|
573
|
+
WRITE1(ivf->nlist);
|
|
574
|
+
WRITE1(ivf->nprobe);
|
|
575
|
+
write_index_binary(ivf->quantizer, f);
|
|
576
|
+
write_direct_map(&ivf->direct_map, f);
|
|
492
577
|
}
|
|
493
578
|
|
|
494
|
-
static void write_binary_hash_invlists
|
|
495
|
-
const IndexBinaryHash::InvertedListMap
|
|
496
|
-
int b,
|
|
497
|
-
{
|
|
579
|
+
static void write_binary_hash_invlists(
|
|
580
|
+
const IndexBinaryHash::InvertedListMap& invlists,
|
|
581
|
+
int b,
|
|
582
|
+
IOWriter* f) {
|
|
498
583
|
size_t sz = invlists.size();
|
|
499
|
-
WRITE1
|
|
584
|
+
WRITE1(sz);
|
|
500
585
|
size_t maxil = 0;
|
|
501
586
|
for (auto it = invlists.begin(); it != invlists.end(); ++it) {
|
|
502
|
-
if(it->second.ids.size() > maxil) {
|
|
587
|
+
if (it->second.ids.size() > maxil) {
|
|
503
588
|
maxil = it->second.ids.size();
|
|
504
589
|
}
|
|
505
590
|
}
|
|
506
591
|
int il_nbit = 0;
|
|
507
|
-
while(maxil >= ((uint64_t)1 << il_nbit)) {
|
|
592
|
+
while (maxil >= ((uint64_t)1 << il_nbit)) {
|
|
508
593
|
il_nbit++;
|
|
509
594
|
}
|
|
510
595
|
WRITE1(il_nbit);
|
|
@@ -513,25 +598,25 @@ static void write_binary_hash_invlists (
|
|
|
513
598
|
// memmap it at some point
|
|
514
599
|
|
|
515
600
|
// buffer for bitstrings
|
|
516
|
-
std::vector<uint8_t> buf
|
|
517
|
-
BitstringWriter wr
|
|
601
|
+
std::vector<uint8_t> buf(((b + il_nbit) * sz + 7) / 8);
|
|
602
|
+
BitstringWriter wr(buf.data(), buf.size());
|
|
518
603
|
for (auto it = invlists.begin(); it != invlists.end(); ++it) {
|
|
519
|
-
wr.write
|
|
520
|
-
wr.write
|
|
604
|
+
wr.write(it->first, b);
|
|
605
|
+
wr.write(it->second.ids.size(), il_nbit);
|
|
521
606
|
}
|
|
522
|
-
WRITEVECTOR
|
|
607
|
+
WRITEVECTOR(buf);
|
|
523
608
|
|
|
524
609
|
for (auto it = invlists.begin(); it != invlists.end(); ++it) {
|
|
525
|
-
WRITEVECTOR
|
|
526
|
-
WRITEVECTOR
|
|
610
|
+
WRITEVECTOR(it->second.ids);
|
|
611
|
+
WRITEVECTOR(it->second.vecs);
|
|
527
612
|
}
|
|
528
613
|
}
|
|
529
614
|
|
|
530
615
|
static void write_binary_multi_hash_map(
|
|
531
|
-
const IndexBinaryMultiHash::Map
|
|
532
|
-
int b,
|
|
533
|
-
|
|
534
|
-
{
|
|
616
|
+
const IndexBinaryMultiHash::Map& map,
|
|
617
|
+
int b,
|
|
618
|
+
size_t ntotal,
|
|
619
|
+
IOWriter* f) {
|
|
535
620
|
int id_bits = 0;
|
|
536
621
|
while ((ntotal > ((Index::idx_t)1 << id_bits))) {
|
|
537
622
|
id_bits++;
|
|
@@ -541,7 +626,7 @@ static void write_binary_multi_hash_map(
|
|
|
541
626
|
WRITE1(sz);
|
|
542
627
|
size_t nbit = (b + id_bits) * sz + ntotal * id_bits;
|
|
543
628
|
std::vector<uint8_t> buf((nbit + 7) / 8);
|
|
544
|
-
BitstringWriter wr
|
|
629
|
+
BitstringWriter wr(buf.data(), buf.size());
|
|
545
630
|
for (auto it = map.begin(); it != map.end(); ++it) {
|
|
546
631
|
wr.write(it->first, b);
|
|
547
632
|
wr.write(it->second.size(), id_bits);
|
|
@@ -549,80 +634,85 @@ static void write_binary_multi_hash_map(
|
|
|
549
634
|
wr.write(id, id_bits);
|
|
550
635
|
}
|
|
551
636
|
}
|
|
552
|
-
WRITEVECTOR
|
|
637
|
+
WRITEVECTOR(buf);
|
|
553
638
|
}
|
|
554
639
|
|
|
555
|
-
void write_index_binary
|
|
556
|
-
if (const IndexBinaryFlat
|
|
557
|
-
|
|
558
|
-
uint32_t h = fourcc
|
|
559
|
-
WRITE1
|
|
560
|
-
write_index_binary_header
|
|
561
|
-
WRITEVECTOR
|
|
562
|
-
} else if (
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
640
|
+
void write_index_binary(const IndexBinary* idx, IOWriter* f) {
|
|
641
|
+
if (const IndexBinaryFlat* idxf =
|
|
642
|
+
dynamic_cast<const IndexBinaryFlat*>(idx)) {
|
|
643
|
+
uint32_t h = fourcc("IBxF");
|
|
644
|
+
WRITE1(h);
|
|
645
|
+
write_index_binary_header(idx, f);
|
|
646
|
+
WRITEVECTOR(idxf->xb);
|
|
647
|
+
} else if (
|
|
648
|
+
const IndexBinaryIVF* ivf =
|
|
649
|
+
dynamic_cast<const IndexBinaryIVF*>(idx)) {
|
|
650
|
+
uint32_t h = fourcc("IBwF");
|
|
651
|
+
WRITE1(h);
|
|
652
|
+
write_binary_ivf_header(ivf, f);
|
|
653
|
+
write_InvertedLists(ivf->invlists, f);
|
|
654
|
+
} else if (
|
|
655
|
+
const IndexBinaryFromFloat* idxff =
|
|
656
|
+
dynamic_cast<const IndexBinaryFromFloat*>(idx)) {
|
|
657
|
+
uint32_t h = fourcc("IBFf");
|
|
658
|
+
WRITE1(h);
|
|
659
|
+
write_index_binary_header(idxff, f);
|
|
660
|
+
write_index(idxff->index, f);
|
|
661
|
+
} else if (
|
|
662
|
+
const IndexBinaryHNSW* idxhnsw =
|
|
663
|
+
dynamic_cast<const IndexBinaryHNSW*>(idx)) {
|
|
664
|
+
uint32_t h = fourcc("IBHf");
|
|
665
|
+
WRITE1(h);
|
|
666
|
+
write_index_binary_header(idxhnsw, f);
|
|
667
|
+
write_HNSW(&idxhnsw->hnsw, f);
|
|
668
|
+
write_index_binary(idxhnsw->storage, f);
|
|
669
|
+
} else if (
|
|
670
|
+
const IndexBinaryIDMap* idxmap =
|
|
671
|
+
dynamic_cast<const IndexBinaryIDMap*>(idx)) {
|
|
672
|
+
uint32_t h = dynamic_cast<const IndexBinaryIDMap2*>(idx)
|
|
673
|
+
? fourcc("IBM2")
|
|
674
|
+
: fourcc("IBMp");
|
|
586
675
|
// no need to store additional info for IndexIDMap2
|
|
587
|
-
WRITE1
|
|
588
|
-
write_index_binary_header
|
|
589
|
-
write_index_binary
|
|
590
|
-
WRITEVECTOR
|
|
591
|
-
} else if (
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
WRITE1
|
|
676
|
+
WRITE1(h);
|
|
677
|
+
write_index_binary_header(idxmap, f);
|
|
678
|
+
write_index_binary(idxmap->index, f);
|
|
679
|
+
WRITEVECTOR(idxmap->id_map);
|
|
680
|
+
} else if (
|
|
681
|
+
const IndexBinaryHash* idxh =
|
|
682
|
+
dynamic_cast<const IndexBinaryHash*>(idx)) {
|
|
683
|
+
uint32_t h = fourcc("IBHh");
|
|
684
|
+
WRITE1(h);
|
|
685
|
+
write_index_binary_header(idxh, f);
|
|
686
|
+
WRITE1(idxh->b);
|
|
687
|
+
WRITE1(idxh->nflip);
|
|
598
688
|
write_binary_hash_invlists(idxh->invlists, idxh->b, f);
|
|
599
|
-
} else if (
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
WRITE1
|
|
607
|
-
WRITE1
|
|
689
|
+
} else if (
|
|
690
|
+
const IndexBinaryMultiHash* idxmh =
|
|
691
|
+
dynamic_cast<const IndexBinaryMultiHash*>(idx)) {
|
|
692
|
+
uint32_t h = fourcc("IBHm");
|
|
693
|
+
WRITE1(h);
|
|
694
|
+
write_index_binary_header(idxmh, f);
|
|
695
|
+
write_index_binary(idxmh->storage, f);
|
|
696
|
+
WRITE1(idxmh->b);
|
|
697
|
+
WRITE1(idxmh->nhash);
|
|
698
|
+
WRITE1(idxmh->nflip);
|
|
608
699
|
for (int i = 0; i < idxmh->nhash; i++) {
|
|
609
700
|
write_binary_multi_hash_map(
|
|
610
701
|
idxmh->maps[i], idxmh->b, idxmh->ntotal, f);
|
|
611
702
|
}
|
|
612
703
|
} else {
|
|
613
|
-
FAISS_THROW_MSG
|
|
704
|
+
FAISS_THROW_MSG("don't know how to serialize this type of index");
|
|
614
705
|
}
|
|
615
706
|
}
|
|
616
707
|
|
|
617
|
-
void write_index_binary
|
|
708
|
+
void write_index_binary(const IndexBinary* idx, FILE* f) {
|
|
618
709
|
FileIOWriter writer(f);
|
|
619
710
|
write_index_binary(idx, &writer);
|
|
620
711
|
}
|
|
621
712
|
|
|
622
|
-
void write_index_binary
|
|
713
|
+
void write_index_binary(const IndexBinary* idx, const char* fname) {
|
|
623
714
|
FileIOWriter writer(fname);
|
|
624
|
-
write_index_binary
|
|
715
|
+
write_index_binary(idx, &writer);
|
|
625
716
|
}
|
|
626
717
|
|
|
627
|
-
|
|
628
718
|
} // namespace faiss
|