faiss 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#pragma once
|
|
11
|
+
|
|
12
|
+
#include <faiss/IndexIVF.h>
|
|
13
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
namespace faiss {
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* The uniform quantizer has a range [vmin, vmax]. The range can be
|
|
20
|
+
* the same for all dimensions (uniform) or specific per dimension
|
|
21
|
+
* (default).
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
struct ScalarQuantizer {
|
|
25
|
+
|
|
26
|
+
enum QuantizerType {
|
|
27
|
+
QT_8bit, ///< 8 bits per component
|
|
28
|
+
QT_4bit, ///< 4 bits per component
|
|
29
|
+
QT_8bit_uniform, ///< same, shared range for all dimensions
|
|
30
|
+
QT_4bit_uniform,
|
|
31
|
+
QT_fp16,
|
|
32
|
+
QT_8bit_direct, /// fast indexing of uint8s
|
|
33
|
+
QT_6bit, ///< 6 bits per component
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
QuantizerType qtype;
|
|
37
|
+
|
|
38
|
+
/** The uniform encoder can estimate the range of representable
|
|
39
|
+
* values of the unform encoder using different statistics. Here
|
|
40
|
+
* rs = rangestat_arg */
|
|
41
|
+
|
|
42
|
+
// rangestat_arg.
|
|
43
|
+
enum RangeStat {
|
|
44
|
+
RS_minmax, ///< [min - rs*(max-min), max + rs*(max-min)]
|
|
45
|
+
RS_meanstd, ///< [mean - std * rs, mean + std * rs]
|
|
46
|
+
RS_quantiles, ///< [Q(rs), Q(1-rs)]
|
|
47
|
+
RS_optim, ///< alternate optimization of reconstruction error
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
RangeStat rangestat;
|
|
51
|
+
float rangestat_arg;
|
|
52
|
+
|
|
53
|
+
/// dimension of input vectors
|
|
54
|
+
size_t d;
|
|
55
|
+
|
|
56
|
+
/// bytes per vector
|
|
57
|
+
size_t code_size;
|
|
58
|
+
|
|
59
|
+
/// trained values (including the range)
|
|
60
|
+
std::vector<float> trained;
|
|
61
|
+
|
|
62
|
+
ScalarQuantizer (size_t d, QuantizerType qtype);
|
|
63
|
+
ScalarQuantizer ();
|
|
64
|
+
|
|
65
|
+
void train (size_t n, const float *x);
|
|
66
|
+
|
|
67
|
+
/// Used by an IVF index to train based on the residuals
|
|
68
|
+
void train_residual (size_t n,
|
|
69
|
+
const float *x,
|
|
70
|
+
Index *quantizer,
|
|
71
|
+
bool by_residual,
|
|
72
|
+
bool verbose);
|
|
73
|
+
|
|
74
|
+
/// same as compute_code for several vectors
|
|
75
|
+
void compute_codes (const float * x,
|
|
76
|
+
uint8_t * codes,
|
|
77
|
+
size_t n) const ;
|
|
78
|
+
|
|
79
|
+
/// decode a vector from a given code (or n vectors if third argument)
|
|
80
|
+
void decode (const uint8_t *code, float *x, size_t n) const;
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
/*****************************************************
|
|
84
|
+
* Objects that provide methods for encoding/decoding, distance
|
|
85
|
+
* computation and inverted list scanning
|
|
86
|
+
*****************************************************/
|
|
87
|
+
|
|
88
|
+
struct Quantizer {
|
|
89
|
+
// encodes one vector. Assumes code is filled with 0s on input!
|
|
90
|
+
virtual void encode_vector(const float *x, uint8_t *code) const = 0;
|
|
91
|
+
virtual void decode_vector(const uint8_t *code, float *x) const = 0;
|
|
92
|
+
|
|
93
|
+
virtual ~Quantizer() {}
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
Quantizer * select_quantizer() const;
|
|
97
|
+
|
|
98
|
+
struct SQDistanceComputer: DistanceComputer {
|
|
99
|
+
|
|
100
|
+
const float *q;
|
|
101
|
+
const uint8_t *codes;
|
|
102
|
+
size_t code_size;
|
|
103
|
+
|
|
104
|
+
SQDistanceComputer (): q(nullptr), codes (nullptr), code_size (0)
|
|
105
|
+
{}
|
|
106
|
+
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
SQDistanceComputer *get_distance_computer (MetricType metric = METRIC_L2)
|
|
110
|
+
const;
|
|
111
|
+
|
|
112
|
+
InvertedListScanner *select_InvertedListScanner
|
|
113
|
+
(MetricType mt, const Index *quantizer, bool store_pairs,
|
|
114
|
+
bool by_residual=false) const;
|
|
115
|
+
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
} // namespace faiss
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#include <faiss/impl/FaissAssert.h>
|
|
9
|
+
#include <exception>
|
|
10
|
+
#include <iostream>
|
|
11
|
+
|
|
12
|
+
namespace faiss {
|
|
13
|
+
|
|
14
|
+
template <typename IndexT>
|
|
15
|
+
ThreadedIndex<IndexT>::ThreadedIndex(bool threaded)
|
|
16
|
+
// 0 is default dimension
|
|
17
|
+
: ThreadedIndex(0, threaded) {
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
template <typename IndexT>
|
|
21
|
+
ThreadedIndex<IndexT>::ThreadedIndex(int d, bool threaded)
|
|
22
|
+
: IndexT(d),
|
|
23
|
+
own_fields(false),
|
|
24
|
+
isThreaded_(threaded) {
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
template <typename IndexT>
|
|
28
|
+
ThreadedIndex<IndexT>::~ThreadedIndex() {
|
|
29
|
+
for (auto& p : indices_) {
|
|
30
|
+
if (isThreaded_) {
|
|
31
|
+
// should have worker thread
|
|
32
|
+
FAISS_ASSERT((bool) p.second);
|
|
33
|
+
|
|
34
|
+
// This will also flush all pending work
|
|
35
|
+
p.second->stop();
|
|
36
|
+
p.second->waitForThreadExit();
|
|
37
|
+
} else {
|
|
38
|
+
// should not have worker thread
|
|
39
|
+
FAISS_ASSERT(!(bool) p.second);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (own_fields) {
|
|
43
|
+
delete p.first;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
template <typename IndexT>
|
|
49
|
+
void ThreadedIndex<IndexT>::addIndex(IndexT* index) {
|
|
50
|
+
// We inherit the dimension from the first index added to us if we don't have
|
|
51
|
+
// a set dimension
|
|
52
|
+
if (indices_.empty() && this->d == 0) {
|
|
53
|
+
this->d = index->d;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// The new index must match our set dimension
|
|
57
|
+
FAISS_THROW_IF_NOT_FMT(this->d == index->d,
|
|
58
|
+
"addIndex: dimension mismatch for "
|
|
59
|
+
"newly added index; expecting dim %d, "
|
|
60
|
+
"new index has dim %d",
|
|
61
|
+
this->d, index->d);
|
|
62
|
+
|
|
63
|
+
if (!indices_.empty()) {
|
|
64
|
+
auto& existing = indices_.front().first;
|
|
65
|
+
|
|
66
|
+
FAISS_THROW_IF_NOT_MSG(index->metric_type == existing->metric_type,
|
|
67
|
+
"addIndex: newly added index is "
|
|
68
|
+
"of different metric type than old index");
|
|
69
|
+
|
|
70
|
+
// Make sure this index is not duplicated
|
|
71
|
+
for (auto& p : indices_) {
|
|
72
|
+
FAISS_THROW_IF_NOT_MSG(p.first != index,
|
|
73
|
+
"addIndex: attempting to add index "
|
|
74
|
+
"that is already in the collection");
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
indices_.emplace_back(
|
|
79
|
+
std::make_pair(
|
|
80
|
+
index,
|
|
81
|
+
std::unique_ptr<WorkerThread>(isThreaded_ ?
|
|
82
|
+
new WorkerThread : nullptr)));
|
|
83
|
+
|
|
84
|
+
onAfterAddIndex(index);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
template <typename IndexT>
|
|
88
|
+
void ThreadedIndex<IndexT>::removeIndex(IndexT* index) {
|
|
89
|
+
for (auto it = indices_.begin(); it != indices_.end(); ++it) {
|
|
90
|
+
if (it->first == index) {
|
|
91
|
+
// This is our index; stop the worker thread before removing it,
|
|
92
|
+
// to ensure that it has finished before function exit
|
|
93
|
+
if (isThreaded_) {
|
|
94
|
+
// should have worker thread
|
|
95
|
+
FAISS_ASSERT((bool) it->second);
|
|
96
|
+
it->second->stop();
|
|
97
|
+
it->second->waitForThreadExit();
|
|
98
|
+
} else {
|
|
99
|
+
// should not have worker thread
|
|
100
|
+
FAISS_ASSERT(!(bool) it->second);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
indices_.erase(it);
|
|
104
|
+
onAfterRemoveIndex(index);
|
|
105
|
+
|
|
106
|
+
if (own_fields) {
|
|
107
|
+
delete index;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// could not find our index
|
|
115
|
+
FAISS_THROW_MSG("IndexReplicas::removeIndex: index not found");
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
template <typename IndexT>
|
|
119
|
+
void ThreadedIndex<IndexT>::runOnIndex(std::function<void(int, IndexT*)> f) {
|
|
120
|
+
if (isThreaded_) {
|
|
121
|
+
std::vector<std::future<bool>> v;
|
|
122
|
+
|
|
123
|
+
for (int i = 0; i < this->indices_.size(); ++i) {
|
|
124
|
+
auto& p = this->indices_[i];
|
|
125
|
+
auto indexPtr = p.first;
|
|
126
|
+
v.emplace_back(p.second->add([f, i, indexPtr](){ f(i, indexPtr); }));
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
waitAndHandleFutures(v);
|
|
130
|
+
} else {
|
|
131
|
+
// Multiple exceptions may be thrown; gather them as we encounter them,
|
|
132
|
+
// while letting everything else run to completion
|
|
133
|
+
std::vector<std::pair<int, std::exception_ptr>> exceptions;
|
|
134
|
+
|
|
135
|
+
for (int i = 0; i < this->indices_.size(); ++i) {
|
|
136
|
+
auto& p = this->indices_[i];
|
|
137
|
+
try {
|
|
138
|
+
f(i, p.first);
|
|
139
|
+
} catch (...) {
|
|
140
|
+
exceptions.emplace_back(std::make_pair(i, std::current_exception()));
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
handleExceptions(exceptions);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
template <typename IndexT>
|
|
149
|
+
void ThreadedIndex<IndexT>::runOnIndex(
|
|
150
|
+
std::function<void(int, const IndexT*)> f) const {
|
|
151
|
+
const_cast<ThreadedIndex<IndexT>*>(this)->runOnIndex(
|
|
152
|
+
[f](int i, IndexT* idx){ f(i, idx); });
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
template <typename IndexT>
|
|
156
|
+
void ThreadedIndex<IndexT>::reset() {
|
|
157
|
+
runOnIndex([](int, IndexT* index){ index->reset(); });
|
|
158
|
+
this->ntotal = 0;
|
|
159
|
+
this->is_trained = false;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
template <typename IndexT>
|
|
163
|
+
void
|
|
164
|
+
ThreadedIndex<IndexT>::onAfterAddIndex(IndexT* index) {
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
template <typename IndexT>
|
|
168
|
+
void
|
|
169
|
+
ThreadedIndex<IndexT>::onAfterRemoveIndex(IndexT* index) {
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
template <typename IndexT>
|
|
173
|
+
void
|
|
174
|
+
ThreadedIndex<IndexT>::waitAndHandleFutures(std::vector<std::future<bool>>& v) {
|
|
175
|
+
// Blocking wait for completion for all of the indices, capturing any
|
|
176
|
+
// exceptions that are generated
|
|
177
|
+
std::vector<std::pair<int, std::exception_ptr>> exceptions;
|
|
178
|
+
|
|
179
|
+
for (int i = 0; i < v.size(); ++i) {
|
|
180
|
+
auto& fut = v[i];
|
|
181
|
+
|
|
182
|
+
try {
|
|
183
|
+
fut.get();
|
|
184
|
+
} catch (...) {
|
|
185
|
+
exceptions.emplace_back(std::make_pair(i, std::current_exception()));
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
handleExceptions(exceptions);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
} // namespace
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#include <faiss/Index.h>
|
|
11
|
+
#include <faiss/IndexBinary.h>
|
|
12
|
+
#include <faiss/utils/WorkerThread.h>
|
|
13
|
+
#include <memory>
|
|
14
|
+
#include <vector>
|
|
15
|
+
|
|
16
|
+
namespace faiss {
|
|
17
|
+
|
|
18
|
+
/// A holder of indices in a collection of threads
|
|
19
|
+
/// The interface to this class itself is not thread safe
|
|
20
|
+
template <typename IndexT>
|
|
21
|
+
class ThreadedIndex : public IndexT {
|
|
22
|
+
public:
|
|
23
|
+
explicit ThreadedIndex(bool threaded);
|
|
24
|
+
explicit ThreadedIndex(int d, bool threaded);
|
|
25
|
+
|
|
26
|
+
~ThreadedIndex() override;
|
|
27
|
+
|
|
28
|
+
/// override an index that is managed by ourselves.
|
|
29
|
+
/// WARNING: once an index is added, it becomes unsafe to touch it from any
|
|
30
|
+
/// other thread than that on which is managing it, until we are shut
|
|
31
|
+
/// down. Use runOnIndex to perform work on it instead.
|
|
32
|
+
void addIndex(IndexT* index);
|
|
33
|
+
|
|
34
|
+
/// Remove an index that is managed by ourselves.
|
|
35
|
+
/// This will flush all pending work on that index, and then shut
|
|
36
|
+
/// down its managing thread, and will remove the index.
|
|
37
|
+
void removeIndex(IndexT* index);
|
|
38
|
+
|
|
39
|
+
/// Run a function on all indices, in the thread that the index is
|
|
40
|
+
/// managed in.
|
|
41
|
+
/// Function arguments are (index in collection, index pointer)
|
|
42
|
+
void runOnIndex(std::function<void(int, IndexT*)> f);
|
|
43
|
+
void runOnIndex(std::function<void(int, const IndexT*)> f) const;
|
|
44
|
+
|
|
45
|
+
/// faiss::Index API
|
|
46
|
+
/// All indices receive the same call
|
|
47
|
+
void reset() override;
|
|
48
|
+
|
|
49
|
+
/// Returns the number of sub-indices
|
|
50
|
+
int count() const { return indices_.size(); }
|
|
51
|
+
|
|
52
|
+
/// Returns the i-th sub-index
|
|
53
|
+
IndexT* at(int i) { return indices_[i].first; }
|
|
54
|
+
|
|
55
|
+
/// Returns the i-th sub-index (const version)
|
|
56
|
+
const IndexT* at(int i) const { return indices_[i].first; }
|
|
57
|
+
|
|
58
|
+
/// Whether or not we are responsible for deleting our contained indices
|
|
59
|
+
bool own_fields;
|
|
60
|
+
|
|
61
|
+
protected:
|
|
62
|
+
/// Called just after an index is added
|
|
63
|
+
virtual void onAfterAddIndex(IndexT* index);
|
|
64
|
+
|
|
65
|
+
/// Called just after an index is removed
|
|
66
|
+
virtual void onAfterRemoveIndex(IndexT* index);
|
|
67
|
+
|
|
68
|
+
protected:
|
|
69
|
+
static void waitAndHandleFutures(std::vector<std::future<bool>>& v);
|
|
70
|
+
|
|
71
|
+
/// Collection of Index instances, with their managing worker thread if any
|
|
72
|
+
std::vector<std::pair<IndexT*, std::unique_ptr<WorkerThread>>> indices_;
|
|
73
|
+
|
|
74
|
+
/// Is this index multi-threaded?
|
|
75
|
+
bool isThreaded_;
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
} // namespace
|
|
79
|
+
|
|
80
|
+
#include <faiss/impl/ThreadedIndex-inl.h>
|
|
@@ -0,0 +1,793 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// -*- c++ -*-
|
|
9
|
+
|
|
10
|
+
#include <faiss/index_io.h>
|
|
11
|
+
|
|
12
|
+
#include <cstdio>
|
|
13
|
+
#include <cstdlib>
|
|
14
|
+
|
|
15
|
+
#include <sys/mman.h>
|
|
16
|
+
#include <sys/types.h>
|
|
17
|
+
#include <sys/stat.h>
|
|
18
|
+
#include <unistd.h>
|
|
19
|
+
|
|
20
|
+
#include <faiss/impl/FaissAssert.h>
|
|
21
|
+
#include <faiss/impl/io.h>
|
|
22
|
+
|
|
23
|
+
#include <faiss/IndexFlat.h>
|
|
24
|
+
#include <faiss/VectorTransform.h>
|
|
25
|
+
#include <faiss/IndexPreTransform.h>
|
|
26
|
+
#include <faiss/IndexLSH.h>
|
|
27
|
+
#include <faiss/IndexPQ.h>
|
|
28
|
+
#include <faiss/IndexIVF.h>
|
|
29
|
+
#include <faiss/IndexIVFPQ.h>
|
|
30
|
+
#include <faiss/IndexIVFPQR.h>
|
|
31
|
+
#include <faiss/Index2Layer.h>
|
|
32
|
+
#include <faiss/IndexIVFFlat.h>
|
|
33
|
+
#include <faiss/IndexIVFSpectralHash.h>
|
|
34
|
+
#include <faiss/MetaIndexes.h>
|
|
35
|
+
#include <faiss/IndexScalarQuantizer.h>
|
|
36
|
+
#include <faiss/IndexHNSW.h>
|
|
37
|
+
#include <faiss/IndexLattice.h>
|
|
38
|
+
|
|
39
|
+
#include <faiss/OnDiskInvertedLists.h>
|
|
40
|
+
#include <faiss/IndexBinaryFlat.h>
|
|
41
|
+
#include <faiss/IndexBinaryFromFloat.h>
|
|
42
|
+
#include <faiss/IndexBinaryHNSW.h>
|
|
43
|
+
#include <faiss/IndexBinaryIVF.h>
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
namespace faiss {
|
|
48
|
+
|
|
49
|
+
/*************************************************************
|
|
50
|
+
* I/O macros
|
|
51
|
+
*
|
|
52
|
+
* we use macros so that we have a line number to report in abort
|
|
53
|
+
* (). This makes debugging a lot easier. The IOReader or IOWriter is
|
|
54
|
+
* always called f and thus is not passed in as a macro parameter.
|
|
55
|
+
**************************************************************/
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
#define READANDCHECK(ptr, n) { \
|
|
59
|
+
size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
|
|
60
|
+
FAISS_THROW_IF_NOT_FMT(ret == (n), \
|
|
61
|
+
"read error in %s: %ld != %ld (%s)", \
|
|
62
|
+
f->name.c_str(), ret, size_t(n), strerror(errno)); \
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
#define READ1(x) READANDCHECK(&(x), 1)
|
|
66
|
+
|
|
67
|
+
// will fail if we write 256G of data at once...
|
|
68
|
+
#define READVECTOR(vec) { \
|
|
69
|
+
long size; \
|
|
70
|
+
READANDCHECK (&size, 1); \
|
|
71
|
+
FAISS_THROW_IF_NOT (size >= 0 && size < (1L << 40)); \
|
|
72
|
+
(vec).resize (size); \
|
|
73
|
+
READANDCHECK ((vec).data (), size); \
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
/*************************************************************
|
|
79
|
+
* Read
|
|
80
|
+
**************************************************************/
|
|
81
|
+
|
|
82
|
+
static void read_index_header (Index *idx, IOReader *f) {
|
|
83
|
+
READ1 (idx->d);
|
|
84
|
+
READ1 (idx->ntotal);
|
|
85
|
+
Index::idx_t dummy;
|
|
86
|
+
READ1 (dummy);
|
|
87
|
+
READ1 (dummy);
|
|
88
|
+
READ1 (idx->is_trained);
|
|
89
|
+
READ1 (idx->metric_type);
|
|
90
|
+
if (idx->metric_type > 1) {
|
|
91
|
+
READ1 (idx->metric_arg);
|
|
92
|
+
}
|
|
93
|
+
idx->verbose = false;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
VectorTransform* read_VectorTransform (IOReader *f) {
|
|
97
|
+
uint32_t h;
|
|
98
|
+
READ1 (h);
|
|
99
|
+
VectorTransform *vt = nullptr;
|
|
100
|
+
|
|
101
|
+
if (h == fourcc ("rrot") || h == fourcc ("PCAm") ||
|
|
102
|
+
h == fourcc ("LTra") || h == fourcc ("PcAm") ||
|
|
103
|
+
h == fourcc ("Viqm")) {
|
|
104
|
+
LinearTransform *lt = nullptr;
|
|
105
|
+
if (h == fourcc ("rrot")) {
|
|
106
|
+
lt = new RandomRotationMatrix ();
|
|
107
|
+
} else if (h == fourcc ("PCAm") ||
|
|
108
|
+
h == fourcc ("PcAm")) {
|
|
109
|
+
PCAMatrix * pca = new PCAMatrix ();
|
|
110
|
+
READ1 (pca->eigen_power);
|
|
111
|
+
READ1 (pca->random_rotation);
|
|
112
|
+
if (h == fourcc ("PcAm"))
|
|
113
|
+
READ1 (pca->balanced_bins);
|
|
114
|
+
READVECTOR (pca->mean);
|
|
115
|
+
READVECTOR (pca->eigenvalues);
|
|
116
|
+
READVECTOR (pca->PCAMat);
|
|
117
|
+
lt = pca;
|
|
118
|
+
} else if (h == fourcc ("Viqm")) {
|
|
119
|
+
ITQMatrix *itqm = new ITQMatrix ();
|
|
120
|
+
READ1 (itqm->max_iter);
|
|
121
|
+
READ1 (itqm->seed);
|
|
122
|
+
lt = itqm;
|
|
123
|
+
} else if (h == fourcc ("LTra")) {
|
|
124
|
+
lt = new LinearTransform ();
|
|
125
|
+
}
|
|
126
|
+
READ1 (lt->have_bias);
|
|
127
|
+
READVECTOR (lt->A);
|
|
128
|
+
READVECTOR (lt->b);
|
|
129
|
+
FAISS_THROW_IF_NOT (lt->A.size() >= lt->d_in * lt->d_out);
|
|
130
|
+
FAISS_THROW_IF_NOT (!lt->have_bias || lt->b.size() >= lt->d_out);
|
|
131
|
+
lt->set_is_orthonormal();
|
|
132
|
+
vt = lt;
|
|
133
|
+
} else if (h == fourcc ("RmDT")) {
|
|
134
|
+
RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
|
|
135
|
+
READVECTOR (rdt->map);
|
|
136
|
+
vt = rdt;
|
|
137
|
+
} else if (h == fourcc ("VNrm")) {
|
|
138
|
+
NormalizationTransform *nt = new NormalizationTransform ();
|
|
139
|
+
READ1 (nt->norm);
|
|
140
|
+
vt = nt;
|
|
141
|
+
} else if (h == fourcc ("VCnt")) {
|
|
142
|
+
CenteringTransform *ct = new CenteringTransform ();
|
|
143
|
+
READVECTOR (ct->mean);
|
|
144
|
+
vt = ct;
|
|
145
|
+
} else if (h == fourcc ("Viqt")) {
|
|
146
|
+
ITQTransform *itqt = new ITQTransform ();
|
|
147
|
+
|
|
148
|
+
READVECTOR (itqt->mean);
|
|
149
|
+
READ1 (itqt->do_pca);
|
|
150
|
+
{
|
|
151
|
+
ITQMatrix *itqm = dynamic_cast<ITQMatrix*>
|
|
152
|
+
(read_VectorTransform (f));
|
|
153
|
+
FAISS_THROW_IF_NOT(itqm);
|
|
154
|
+
itqt->itq = *itqm;
|
|
155
|
+
delete itqm;
|
|
156
|
+
}
|
|
157
|
+
{
|
|
158
|
+
LinearTransform *pi = dynamic_cast<LinearTransform*>
|
|
159
|
+
(read_VectorTransform (f));
|
|
160
|
+
FAISS_THROW_IF_NOT (pi);
|
|
161
|
+
itqt->pca_then_itq = *pi;
|
|
162
|
+
delete pi;
|
|
163
|
+
}
|
|
164
|
+
vt = itqt;
|
|
165
|
+
} else {
|
|
166
|
+
FAISS_THROW_MSG("fourcc not recognized");
|
|
167
|
+
}
|
|
168
|
+
READ1 (vt->d_in);
|
|
169
|
+
READ1 (vt->d_out);
|
|
170
|
+
READ1 (vt->is_trained);
|
|
171
|
+
return vt;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
static void read_ArrayInvertedLists_sizes (
|
|
176
|
+
IOReader *f, std::vector<size_t> & sizes)
|
|
177
|
+
{
|
|
178
|
+
uint32_t list_type;
|
|
179
|
+
READ1(list_type);
|
|
180
|
+
if (list_type == fourcc("full")) {
|
|
181
|
+
size_t os = sizes.size();
|
|
182
|
+
READVECTOR (sizes);
|
|
183
|
+
FAISS_THROW_IF_NOT (os == sizes.size());
|
|
184
|
+
} else if (list_type == fourcc("sprs")) {
|
|
185
|
+
std::vector<size_t> idsizes;
|
|
186
|
+
READVECTOR (idsizes);
|
|
187
|
+
for (size_t j = 0; j < idsizes.size(); j += 2) {
|
|
188
|
+
FAISS_THROW_IF_NOT (idsizes[j] < sizes.size());
|
|
189
|
+
sizes[idsizes[j]] = idsizes[j + 1];
|
|
190
|
+
}
|
|
191
|
+
} else {
|
|
192
|
+
FAISS_THROW_MSG ("invalid list_type");
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
|
|
197
|
+
uint32_t h;
|
|
198
|
+
READ1 (h);
|
|
199
|
+
if (h == fourcc ("il00")) {
|
|
200
|
+
fprintf(stderr, "read_InvertedLists:"
|
|
201
|
+
" WARN! inverted lists not stored with IVF object\n");
|
|
202
|
+
return nullptr;
|
|
203
|
+
} else if (h == fourcc ("ilar") && !(io_flags & IO_FLAG_MMAP)) {
|
|
204
|
+
auto ails = new ArrayInvertedLists (0, 0);
|
|
205
|
+
READ1 (ails->nlist);
|
|
206
|
+
READ1 (ails->code_size);
|
|
207
|
+
ails->ids.resize (ails->nlist);
|
|
208
|
+
ails->codes.resize (ails->nlist);
|
|
209
|
+
std::vector<size_t> sizes (ails->nlist);
|
|
210
|
+
read_ArrayInvertedLists_sizes (f, sizes);
|
|
211
|
+
for (size_t i = 0; i < ails->nlist; i++) {
|
|
212
|
+
ails->ids[i].resize (sizes[i]);
|
|
213
|
+
ails->codes[i].resize (sizes[i] * ails->code_size);
|
|
214
|
+
}
|
|
215
|
+
for (size_t i = 0; i < ails->nlist; i++) {
|
|
216
|
+
size_t n = ails->ids[i].size();
|
|
217
|
+
if (n > 0) {
|
|
218
|
+
READANDCHECK (ails->codes[i].data(), n * ails->code_size);
|
|
219
|
+
READANDCHECK (ails->ids[i].data(), n);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
return ails;
|
|
223
|
+
} else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_MMAP)) {
|
|
224
|
+
// then we load it as an OnDiskInvertedLists
|
|
225
|
+
|
|
226
|
+
FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
|
|
227
|
+
FAISS_THROW_IF_NOT_MSG(reader, "mmap only supported for File objects");
|
|
228
|
+
FILE *fdesc = reader->f;
|
|
229
|
+
|
|
230
|
+
auto ails = new OnDiskInvertedLists ();
|
|
231
|
+
READ1 (ails->nlist);
|
|
232
|
+
READ1 (ails->code_size);
|
|
233
|
+
ails->read_only = true;
|
|
234
|
+
ails->lists.resize (ails->nlist);
|
|
235
|
+
std::vector<size_t> sizes (ails->nlist);
|
|
236
|
+
read_ArrayInvertedLists_sizes (f, sizes);
|
|
237
|
+
size_t o0 = ftell(fdesc), o = o0;
|
|
238
|
+
{ // do the mmap
|
|
239
|
+
struct stat buf;
|
|
240
|
+
int ret = fstat (fileno(fdesc), &buf);
|
|
241
|
+
FAISS_THROW_IF_NOT_FMT (ret == 0,
|
|
242
|
+
"fstat failed: %s", strerror(errno));
|
|
243
|
+
ails->totsize = buf.st_size;
|
|
244
|
+
ails->ptr = (uint8_t*)mmap (nullptr, ails->totsize,
|
|
245
|
+
PROT_READ, MAP_SHARED,
|
|
246
|
+
fileno(fdesc), 0);
|
|
247
|
+
FAISS_THROW_IF_NOT_FMT (ails->ptr != MAP_FAILED,
|
|
248
|
+
"could not mmap: %s",
|
|
249
|
+
strerror(errno));
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
for (size_t i = 0; i < ails->nlist; i++) {
|
|
253
|
+
OnDiskInvertedLists::List & l = ails->lists[i];
|
|
254
|
+
l.size = l.capacity = sizes[i];
|
|
255
|
+
l.offset = o;
|
|
256
|
+
o += l.size * (sizeof(OnDiskInvertedLists::idx_t) +
|
|
257
|
+
ails->code_size);
|
|
258
|
+
}
|
|
259
|
+
FAISS_THROW_IF_NOT(o <= ails->totsize);
|
|
260
|
+
// resume normal reading of file
|
|
261
|
+
fseek (fdesc, o, SEEK_SET);
|
|
262
|
+
return ails;
|
|
263
|
+
} else if (h == fourcc ("ilod")) {
|
|
264
|
+
OnDiskInvertedLists *od = new OnDiskInvertedLists();
|
|
265
|
+
od->read_only = io_flags & IO_FLAG_READ_ONLY;
|
|
266
|
+
READ1 (od->nlist);
|
|
267
|
+
READ1 (od->code_size);
|
|
268
|
+
// this is a POD object
|
|
269
|
+
READVECTOR (od->lists);
|
|
270
|
+
{
|
|
271
|
+
std::vector<OnDiskInvertedLists::Slot> v;
|
|
272
|
+
READVECTOR(v);
|
|
273
|
+
od->slots.assign(v.begin(), v.end());
|
|
274
|
+
}
|
|
275
|
+
{
|
|
276
|
+
std::vector<char> x;
|
|
277
|
+
READVECTOR(x);
|
|
278
|
+
od->filename.assign(x.begin(), x.end());
|
|
279
|
+
|
|
280
|
+
if (io_flags & IO_FLAG_ONDISK_SAME_DIR) {
|
|
281
|
+
FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
|
|
282
|
+
FAISS_THROW_IF_NOT_MSG (
|
|
283
|
+
reader, "IO_FLAG_ONDISK_SAME_DIR only supported "
|
|
284
|
+
"when reading from file");
|
|
285
|
+
std::string indexname = reader->name;
|
|
286
|
+
std::string dirname = "./";
|
|
287
|
+
size_t slash = indexname.find_last_of('/');
|
|
288
|
+
if (slash != std::string::npos) {
|
|
289
|
+
dirname = indexname.substr(0, slash + 1);
|
|
290
|
+
}
|
|
291
|
+
std::string filename = od->filename;
|
|
292
|
+
slash = filename.find_last_of('/');
|
|
293
|
+
if (slash != std::string::npos) {
|
|
294
|
+
filename = filename.substr(slash + 1);
|
|
295
|
+
}
|
|
296
|
+
filename = dirname + filename;
|
|
297
|
+
printf("IO_FLAG_ONDISK_SAME_DIR: "
|
|
298
|
+
"updating ondisk filename from %s to %s\n",
|
|
299
|
+
od->filename.c_str(), filename.c_str());
|
|
300
|
+
od->filename = filename;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
}
|
|
304
|
+
READ1(od->totsize);
|
|
305
|
+
od->do_mmap();
|
|
306
|
+
return od;
|
|
307
|
+
} else {
|
|
308
|
+
FAISS_THROW_MSG ("read_InvertedLists: unsupported invlist type");
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
static void read_InvertedLists (
|
|
313
|
+
IndexIVF *ivf, IOReader *f, int io_flags) {
|
|
314
|
+
InvertedLists *ils = read_InvertedLists (f, io_flags);
|
|
315
|
+
FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist &&
|
|
316
|
+
ils->code_size == ivf->code_size));
|
|
317
|
+
ivf->invlists = ils;
|
|
318
|
+
ivf->own_invlists = true;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
static void read_ProductQuantizer (ProductQuantizer *pq, IOReader *f) {
|
|
322
|
+
READ1 (pq->d);
|
|
323
|
+
READ1 (pq->M);
|
|
324
|
+
READ1 (pq->nbits);
|
|
325
|
+
pq->set_derived_values ();
|
|
326
|
+
READVECTOR (pq->centroids);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
static void read_ScalarQuantizer (ScalarQuantizer *ivsc, IOReader *f) {
|
|
330
|
+
READ1 (ivsc->qtype);
|
|
331
|
+
READ1 (ivsc->rangestat);
|
|
332
|
+
READ1 (ivsc->rangestat_arg);
|
|
333
|
+
READ1 (ivsc->d);
|
|
334
|
+
READ1 (ivsc->code_size);
|
|
335
|
+
READVECTOR (ivsc->trained);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
static void read_HNSW (HNSW *hnsw, IOReader *f) {
|
|
340
|
+
READVECTOR (hnsw->assign_probas);
|
|
341
|
+
READVECTOR (hnsw->cum_nneighbor_per_level);
|
|
342
|
+
READVECTOR (hnsw->levels);
|
|
343
|
+
READVECTOR (hnsw->offsets);
|
|
344
|
+
READVECTOR (hnsw->neighbors);
|
|
345
|
+
|
|
346
|
+
READ1 (hnsw->entry_point);
|
|
347
|
+
READ1 (hnsw->max_level);
|
|
348
|
+
READ1 (hnsw->efConstruction);
|
|
349
|
+
READ1 (hnsw->efSearch);
|
|
350
|
+
READ1 (hnsw->upper_beam);
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
ProductQuantizer * read_ProductQuantizer (const char*fname) {
|
|
354
|
+
FileIOReader reader(fname);
|
|
355
|
+
return read_ProductQuantizer(&reader);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
ProductQuantizer * read_ProductQuantizer (IOReader *reader) {
|
|
359
|
+
ProductQuantizer *pq = new ProductQuantizer();
|
|
360
|
+
ScopeDeleter1<ProductQuantizer> del (pq);
|
|
361
|
+
|
|
362
|
+
read_ProductQuantizer(pq, reader);
|
|
363
|
+
del.release ();
|
|
364
|
+
return pq;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
static void read_ivf_header (
|
|
368
|
+
IndexIVF *ivf, IOReader *f,
|
|
369
|
+
std::vector<std::vector<Index::idx_t> > *ids = nullptr)
|
|
370
|
+
{
|
|
371
|
+
read_index_header (ivf, f);
|
|
372
|
+
READ1 (ivf->nlist);
|
|
373
|
+
READ1 (ivf->nprobe);
|
|
374
|
+
ivf->quantizer = read_index (f);
|
|
375
|
+
ivf->own_fields = true;
|
|
376
|
+
if (ids) { // used in legacy "Iv" formats
|
|
377
|
+
ids->resize (ivf->nlist);
|
|
378
|
+
for (size_t i = 0; i < ivf->nlist; i++)
|
|
379
|
+
READVECTOR ((*ids)[i]);
|
|
380
|
+
}
|
|
381
|
+
READ1 (ivf->maintain_direct_map);
|
|
382
|
+
READVECTOR (ivf->direct_map);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// used for legacy formats
|
|
386
|
+
static ArrayInvertedLists *set_array_invlist(
|
|
387
|
+
IndexIVF *ivf, std::vector<std::vector<Index::idx_t> > &ids)
|
|
388
|
+
{
|
|
389
|
+
ArrayInvertedLists *ail = new ArrayInvertedLists (
|
|
390
|
+
ivf->nlist, ivf->code_size);
|
|
391
|
+
std::swap (ail->ids, ids);
|
|
392
|
+
ivf->invlists = ail;
|
|
393
|
+
ivf->own_invlists = true;
|
|
394
|
+
return ail;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags)
|
|
398
|
+
{
|
|
399
|
+
bool legacy = h == fourcc ("IvQR") || h == fourcc ("IvPQ");
|
|
400
|
+
|
|
401
|
+
IndexIVFPQR *ivfpqr =
|
|
402
|
+
h == fourcc ("IvQR") || h == fourcc ("IwQR") ?
|
|
403
|
+
new IndexIVFPQR () : nullptr;
|
|
404
|
+
IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ ();
|
|
405
|
+
|
|
406
|
+
std::vector<std::vector<Index::idx_t> > ids;
|
|
407
|
+
read_ivf_header (ivpq, f, legacy ? &ids : nullptr);
|
|
408
|
+
READ1 (ivpq->by_residual);
|
|
409
|
+
READ1 (ivpq->code_size);
|
|
410
|
+
read_ProductQuantizer (&ivpq->pq, f);
|
|
411
|
+
|
|
412
|
+
if (legacy) {
|
|
413
|
+
ArrayInvertedLists *ail = set_array_invlist (ivpq, ids);
|
|
414
|
+
for (size_t i = 0; i < ail->nlist; i++)
|
|
415
|
+
READVECTOR (ail->codes[i]);
|
|
416
|
+
} else {
|
|
417
|
+
read_InvertedLists (ivpq, f, io_flags);
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
if (ivpq->is_trained) {
|
|
421
|
+
// precomputed table not stored. It is cheaper to recompute it
|
|
422
|
+
ivpq->use_precomputed_table = 0;
|
|
423
|
+
if (ivpq->by_residual)
|
|
424
|
+
ivpq->precompute_table ();
|
|
425
|
+
if (ivfpqr) {
|
|
426
|
+
read_ProductQuantizer (&ivfpqr->refine_pq, f);
|
|
427
|
+
READVECTOR (ivfpqr->refine_codes);
|
|
428
|
+
READ1 (ivfpqr->k_factor);
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
return ivpq;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
int read_old_fmt_hack = 0;
|
|
435
|
+
|
|
436
|
+
Index *read_index (IOReader *f, int io_flags) {
|
|
437
|
+
Index * idx = nullptr;
|
|
438
|
+
uint32_t h;
|
|
439
|
+
READ1 (h);
|
|
440
|
+
if (h == fourcc ("IxFI") || h == fourcc ("IxF2")) {
|
|
441
|
+
IndexFlat *idxf;
|
|
442
|
+
if (h == fourcc ("IxFI")) idxf = new IndexFlatIP ();
|
|
443
|
+
else idxf = new IndexFlatL2 ();
|
|
444
|
+
read_index_header (idxf, f);
|
|
445
|
+
READVECTOR (idxf->xb);
|
|
446
|
+
FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->d);
|
|
447
|
+
// leak!
|
|
448
|
+
idx = idxf;
|
|
449
|
+
} else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
|
|
450
|
+
IndexLSH * idxl = new IndexLSH ();
|
|
451
|
+
read_index_header (idxl, f);
|
|
452
|
+
READ1 (idxl->nbits);
|
|
453
|
+
READ1 (idxl->rotate_data);
|
|
454
|
+
READ1 (idxl->train_thresholds);
|
|
455
|
+
READVECTOR (idxl->thresholds);
|
|
456
|
+
READ1 (idxl->bytes_per_vec);
|
|
457
|
+
if (h == fourcc("IxHE")) {
|
|
458
|
+
FAISS_THROW_IF_NOT_FMT (idxl->nbits % 64 == 0,
|
|
459
|
+
"can only read old format IndexLSH with "
|
|
460
|
+
"nbits multiple of 64 (got %d)",
|
|
461
|
+
(int) idxl->nbits);
|
|
462
|
+
// leak
|
|
463
|
+
idxl->bytes_per_vec *= 8;
|
|
464
|
+
}
|
|
465
|
+
{
|
|
466
|
+
RandomRotationMatrix *rrot = dynamic_cast<RandomRotationMatrix *>
|
|
467
|
+
(read_VectorTransform (f));
|
|
468
|
+
FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation");
|
|
469
|
+
idxl->rrot = *rrot;
|
|
470
|
+
delete rrot;
|
|
471
|
+
}
|
|
472
|
+
READVECTOR (idxl->codes);
|
|
473
|
+
FAISS_THROW_IF_NOT (idxl->rrot.d_in == idxl->d &&
|
|
474
|
+
idxl->rrot.d_out == idxl->nbits);
|
|
475
|
+
FAISS_THROW_IF_NOT (
|
|
476
|
+
idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
|
|
477
|
+
idx = idxl;
|
|
478
|
+
} else if (h == fourcc ("IxPQ") || h == fourcc ("IxPo") ||
|
|
479
|
+
h == fourcc ("IxPq")) {
|
|
480
|
+
// IxPQ and IxPo were merged into the same IndexPQ object
|
|
481
|
+
IndexPQ * idxp =new IndexPQ ();
|
|
482
|
+
read_index_header (idxp, f);
|
|
483
|
+
read_ProductQuantizer (&idxp->pq, f);
|
|
484
|
+
READVECTOR (idxp->codes);
|
|
485
|
+
if (h == fourcc ("IxPo") || h == fourcc ("IxPq")) {
|
|
486
|
+
READ1 (idxp->search_type);
|
|
487
|
+
READ1 (idxp->encode_signs);
|
|
488
|
+
READ1 (idxp->polysemous_ht);
|
|
489
|
+
}
|
|
490
|
+
// Old versoins of PQ all had metric_type set to INNER_PRODUCT
|
|
491
|
+
// when they were in fact using L2. Therefore, we force metric type
|
|
492
|
+
// to L2 when the old format is detected
|
|
493
|
+
if (h == fourcc ("IxPQ") || h == fourcc ("IxPo")) {
|
|
494
|
+
idxp->metric_type = METRIC_L2;
|
|
495
|
+
}
|
|
496
|
+
idx = idxp;
|
|
497
|
+
} else if (h == fourcc ("IvFl") || h == fourcc("IvFL")) { // legacy
|
|
498
|
+
IndexIVFFlat * ivfl = new IndexIVFFlat ();
|
|
499
|
+
std::vector<std::vector<Index::idx_t> > ids;
|
|
500
|
+
read_ivf_header (ivfl, f, &ids);
|
|
501
|
+
ivfl->code_size = ivfl->d * sizeof(float);
|
|
502
|
+
ArrayInvertedLists *ail = set_array_invlist (ivfl, ids);
|
|
503
|
+
|
|
504
|
+
if (h == fourcc ("IvFL")) {
|
|
505
|
+
for (size_t i = 0; i < ivfl->nlist; i++) {
|
|
506
|
+
READVECTOR (ail->codes[i]);
|
|
507
|
+
}
|
|
508
|
+
} else { // old format
|
|
509
|
+
for (size_t i = 0; i < ivfl->nlist; i++) {
|
|
510
|
+
std::vector<float> vec;
|
|
511
|
+
READVECTOR (vec);
|
|
512
|
+
ail->codes[i].resize(vec.size() * sizeof(float));
|
|
513
|
+
memcpy(ail->codes[i].data(), vec.data(),
|
|
514
|
+
ail->codes[i].size());
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
idx = ivfl;
|
|
518
|
+
} else if (h == fourcc ("IwFd")) {
|
|
519
|
+
IndexIVFFlatDedup * ivfl = new IndexIVFFlatDedup ();
|
|
520
|
+
read_ivf_header (ivfl, f);
|
|
521
|
+
ivfl->code_size = ivfl->d * sizeof(float);
|
|
522
|
+
{
|
|
523
|
+
std::vector<Index::idx_t> tab;
|
|
524
|
+
READVECTOR (tab);
|
|
525
|
+
for (long i = 0; i < tab.size(); i += 2) {
|
|
526
|
+
std::pair<Index::idx_t, Index::idx_t>
|
|
527
|
+
pair (tab[i], tab[i + 1]);
|
|
528
|
+
ivfl->instances.insert (pair);
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
read_InvertedLists (ivfl, f, io_flags);
|
|
532
|
+
idx = ivfl;
|
|
533
|
+
} else if (h == fourcc ("IwFl")) {
|
|
534
|
+
IndexIVFFlat * ivfl = new IndexIVFFlat ();
|
|
535
|
+
read_ivf_header (ivfl, f);
|
|
536
|
+
ivfl->code_size = ivfl->d * sizeof(float);
|
|
537
|
+
read_InvertedLists (ivfl, f, io_flags);
|
|
538
|
+
idx = ivfl;
|
|
539
|
+
} else if (h == fourcc ("IxSQ")) {
|
|
540
|
+
IndexScalarQuantizer * idxs = new IndexScalarQuantizer ();
|
|
541
|
+
read_index_header (idxs, f);
|
|
542
|
+
read_ScalarQuantizer (&idxs->sq, f);
|
|
543
|
+
READVECTOR (idxs->codes);
|
|
544
|
+
idxs->code_size = idxs->sq.code_size;
|
|
545
|
+
idx = idxs;
|
|
546
|
+
} else if (h == fourcc ("IxLa")) {
|
|
547
|
+
int d, nsq, scale_nbit, r2;
|
|
548
|
+
READ1 (d);
|
|
549
|
+
READ1 (nsq);
|
|
550
|
+
READ1 (scale_nbit);
|
|
551
|
+
READ1 (r2);
|
|
552
|
+
IndexLattice *idxl = new IndexLattice (d, nsq, scale_nbit, r2);
|
|
553
|
+
read_index_header (idxl, f);
|
|
554
|
+
READVECTOR (idxl->trained);
|
|
555
|
+
idx = idxl;
|
|
556
|
+
} else if(h == fourcc ("IvSQ")) { // legacy
|
|
557
|
+
IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
|
|
558
|
+
std::vector<std::vector<Index::idx_t> > ids;
|
|
559
|
+
read_ivf_header (ivsc, f, &ids);
|
|
560
|
+
read_ScalarQuantizer (&ivsc->sq, f);
|
|
561
|
+
READ1 (ivsc->code_size);
|
|
562
|
+
ArrayInvertedLists *ail = set_array_invlist (ivsc, ids);
|
|
563
|
+
for(int i = 0; i < ivsc->nlist; i++)
|
|
564
|
+
READVECTOR (ail->codes[i]);
|
|
565
|
+
idx = ivsc;
|
|
566
|
+
} else if(h == fourcc ("IwSQ") || h == fourcc ("IwSq")) {
|
|
567
|
+
IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
|
|
568
|
+
read_ivf_header (ivsc, f);
|
|
569
|
+
read_ScalarQuantizer (&ivsc->sq, f);
|
|
570
|
+
READ1 (ivsc->code_size);
|
|
571
|
+
if (h == fourcc ("IwSQ")) {
|
|
572
|
+
ivsc->by_residual = true;
|
|
573
|
+
} else {
|
|
574
|
+
READ1 (ivsc->by_residual);
|
|
575
|
+
}
|
|
576
|
+
read_InvertedLists (ivsc, f, io_flags);
|
|
577
|
+
idx = ivsc;
|
|
578
|
+
} else if(h == fourcc ("IwSh")) {
|
|
579
|
+
IndexIVFSpectralHash *ivsp = new IndexIVFSpectralHash ();
|
|
580
|
+
read_ivf_header (ivsp, f);
|
|
581
|
+
ivsp->vt = read_VectorTransform (f);
|
|
582
|
+
ivsp->own_fields = true;
|
|
583
|
+
READ1 (ivsp->nbit);
|
|
584
|
+
// not stored by write_ivf_header
|
|
585
|
+
ivsp->code_size = (ivsp->nbit + 7) / 8;
|
|
586
|
+
READ1 (ivsp->period);
|
|
587
|
+
READ1 (ivsp->threshold_type);
|
|
588
|
+
READVECTOR (ivsp->trained);
|
|
589
|
+
read_InvertedLists (ivsp, f, io_flags);
|
|
590
|
+
idx = ivsp;
|
|
591
|
+
} else if(h == fourcc ("IvPQ") || h == fourcc ("IvQR") ||
|
|
592
|
+
h == fourcc ("IwPQ") || h == fourcc ("IwQR")) {
|
|
593
|
+
|
|
594
|
+
idx = read_ivfpq (f, h, io_flags);
|
|
595
|
+
|
|
596
|
+
} else if(h == fourcc ("IxPT")) {
|
|
597
|
+
IndexPreTransform * ixpt = new IndexPreTransform();
|
|
598
|
+
ixpt->own_fields = true;
|
|
599
|
+
read_index_header (ixpt, f);
|
|
600
|
+
int nt;
|
|
601
|
+
if (read_old_fmt_hack == 2) {
|
|
602
|
+
nt = 1;
|
|
603
|
+
} else {
|
|
604
|
+
READ1 (nt);
|
|
605
|
+
}
|
|
606
|
+
for (int i = 0; i < nt; i++) {
|
|
607
|
+
ixpt->chain.push_back (read_VectorTransform (f));
|
|
608
|
+
}
|
|
609
|
+
ixpt->index = read_index (f, io_flags);
|
|
610
|
+
idx = ixpt;
|
|
611
|
+
} else if(h == fourcc ("Imiq")) {
|
|
612
|
+
MultiIndexQuantizer * imiq = new MultiIndexQuantizer ();
|
|
613
|
+
read_index_header (imiq, f);
|
|
614
|
+
read_ProductQuantizer (&imiq->pq, f);
|
|
615
|
+
idx = imiq;
|
|
616
|
+
} else if(h == fourcc ("IxRF")) {
|
|
617
|
+
IndexRefineFlat *idxrf = new IndexRefineFlat ();
|
|
618
|
+
read_index_header (idxrf, f);
|
|
619
|
+
idxrf->base_index = read_index(f, io_flags);
|
|
620
|
+
idxrf->own_fields = true;
|
|
621
|
+
IndexFlat *rf = dynamic_cast<IndexFlat*> (read_index (f, io_flags));
|
|
622
|
+
std::swap (*rf, idxrf->refine_index);
|
|
623
|
+
delete rf;
|
|
624
|
+
READ1 (idxrf->k_factor);
|
|
625
|
+
idx = idxrf;
|
|
626
|
+
} else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) {
|
|
627
|
+
bool is_map2 = h == fourcc ("IxM2");
|
|
628
|
+
IndexIDMap * idxmap = is_map2 ? new IndexIDMap2 () : new IndexIDMap ();
|
|
629
|
+
read_index_header (idxmap, f);
|
|
630
|
+
idxmap->index = read_index (f, io_flags);
|
|
631
|
+
idxmap->own_fields = true;
|
|
632
|
+
READVECTOR (idxmap->id_map);
|
|
633
|
+
if (is_map2) {
|
|
634
|
+
static_cast<IndexIDMap2*>(idxmap)->construct_rev_map ();
|
|
635
|
+
}
|
|
636
|
+
idx = idxmap;
|
|
637
|
+
} else if (h == fourcc ("Ix2L")) {
|
|
638
|
+
Index2Layer * idxp = new Index2Layer ();
|
|
639
|
+
read_index_header (idxp, f);
|
|
640
|
+
idxp->q1.quantizer = read_index (f, io_flags);
|
|
641
|
+
READ1 (idxp->q1.nlist);
|
|
642
|
+
READ1 (idxp->q1.quantizer_trains_alone);
|
|
643
|
+
read_ProductQuantizer (&idxp->pq, f);
|
|
644
|
+
READ1 (idxp->code_size_1);
|
|
645
|
+
READ1 (idxp->code_size_2);
|
|
646
|
+
READ1 (idxp->code_size);
|
|
647
|
+
READVECTOR (idxp->codes);
|
|
648
|
+
idx = idxp;
|
|
649
|
+
} else if(h == fourcc("IHNf") || h == fourcc("IHNp") ||
|
|
650
|
+
h == fourcc("IHNs") || h == fourcc("IHN2")) {
|
|
651
|
+
IndexHNSW *idxhnsw = nullptr;
|
|
652
|
+
if (h == fourcc("IHNf")) idxhnsw = new IndexHNSWFlat ();
|
|
653
|
+
if (h == fourcc("IHNp")) idxhnsw = new IndexHNSWPQ ();
|
|
654
|
+
if (h == fourcc("IHNs")) idxhnsw = new IndexHNSWSQ ();
|
|
655
|
+
if (h == fourcc("IHN2")) idxhnsw = new IndexHNSW2Level ();
|
|
656
|
+
read_index_header (idxhnsw, f);
|
|
657
|
+
read_HNSW (&idxhnsw->hnsw, f);
|
|
658
|
+
idxhnsw->storage = read_index (f, io_flags);
|
|
659
|
+
idxhnsw->own_fields = true;
|
|
660
|
+
if (h == fourcc("IHNp")) {
|
|
661
|
+
dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table ();
|
|
662
|
+
}
|
|
663
|
+
idx = idxhnsw;
|
|
664
|
+
} else {
|
|
665
|
+
FAISS_THROW_FMT("Index type 0x%08x not supported\n", h);
|
|
666
|
+
idx = nullptr;
|
|
667
|
+
}
|
|
668
|
+
return idx;
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
Index *read_index (FILE * f, int io_flags) {
|
|
673
|
+
FileIOReader reader(f);
|
|
674
|
+
return read_index(&reader, io_flags);
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
Index *read_index (const char *fname, int io_flags) {
|
|
678
|
+
FileIOReader reader(fname);
|
|
679
|
+
Index *idx = read_index (&reader, io_flags);
|
|
680
|
+
return idx;
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
VectorTransform *read_VectorTransform (const char *fname) {
|
|
684
|
+
FileIOReader reader(fname);
|
|
685
|
+
VectorTransform *vt = read_VectorTransform (&reader);
|
|
686
|
+
return vt;
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
/*************************************************************
|
|
692
|
+
* Read binary indexes
|
|
693
|
+
**************************************************************/
|
|
694
|
+
|
|
695
|
+
static void read_InvertedLists (
|
|
696
|
+
IndexBinaryIVF *ivf, IOReader *f, int io_flags) {
|
|
697
|
+
InvertedLists *ils = read_InvertedLists (f, io_flags);
|
|
698
|
+
FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist &&
|
|
699
|
+
ils->code_size == ivf->code_size));
|
|
700
|
+
ivf->invlists = ils;
|
|
701
|
+
ivf->own_invlists = true;
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
static void read_index_binary_header (IndexBinary *idx, IOReader *f) {
|
|
707
|
+
READ1 (idx->d);
|
|
708
|
+
READ1 (idx->code_size);
|
|
709
|
+
READ1 (idx->ntotal);
|
|
710
|
+
READ1 (idx->is_trained);
|
|
711
|
+
READ1 (idx->metric_type);
|
|
712
|
+
idx->verbose = false;
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
static void read_binary_ivf_header (
|
|
716
|
+
IndexBinaryIVF *ivf, IOReader *f,
|
|
717
|
+
std::vector<std::vector<Index::idx_t> > *ids = nullptr)
|
|
718
|
+
{
|
|
719
|
+
read_index_binary_header (ivf, f);
|
|
720
|
+
READ1 (ivf->nlist);
|
|
721
|
+
READ1 (ivf->nprobe);
|
|
722
|
+
ivf->quantizer = read_index_binary (f);
|
|
723
|
+
ivf->own_fields = true;
|
|
724
|
+
if (ids) { // used in legacy "Iv" formats
|
|
725
|
+
ids->resize (ivf->nlist);
|
|
726
|
+
for (size_t i = 0; i < ivf->nlist; i++)
|
|
727
|
+
READVECTOR ((*ids)[i]);
|
|
728
|
+
}
|
|
729
|
+
READ1 (ivf->maintain_direct_map);
|
|
730
|
+
READVECTOR (ivf->direct_map);
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
IndexBinary *read_index_binary (IOReader *f, int io_flags) {
|
|
734
|
+
IndexBinary * idx = nullptr;
|
|
735
|
+
uint32_t h;
|
|
736
|
+
READ1 (h);
|
|
737
|
+
if (h == fourcc ("IBxF")) {
|
|
738
|
+
IndexBinaryFlat *idxf = new IndexBinaryFlat ();
|
|
739
|
+
read_index_binary_header (idxf, f);
|
|
740
|
+
READVECTOR (idxf->xb);
|
|
741
|
+
FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->code_size);
|
|
742
|
+
// leak!
|
|
743
|
+
idx = idxf;
|
|
744
|
+
} else if (h == fourcc ("IBwF")) {
|
|
745
|
+
IndexBinaryIVF *ivf = new IndexBinaryIVF ();
|
|
746
|
+
read_binary_ivf_header (ivf, f);
|
|
747
|
+
read_InvertedLists (ivf, f, io_flags);
|
|
748
|
+
idx = ivf;
|
|
749
|
+
} else if (h == fourcc ("IBFf")) {
|
|
750
|
+
IndexBinaryFromFloat *idxff = new IndexBinaryFromFloat ();
|
|
751
|
+
read_index_binary_header (idxff, f);
|
|
752
|
+
idxff->own_fields = true;
|
|
753
|
+
idxff->index = read_index (f, io_flags);
|
|
754
|
+
idx = idxff;
|
|
755
|
+
} else if (h == fourcc ("IBHf")) {
|
|
756
|
+
IndexBinaryHNSW *idxhnsw = new IndexBinaryHNSW ();
|
|
757
|
+
read_index_binary_header (idxhnsw, f);
|
|
758
|
+
read_HNSW (&idxhnsw->hnsw, f);
|
|
759
|
+
idxhnsw->storage = read_index_binary (f, io_flags);
|
|
760
|
+
idxhnsw->own_fields = true;
|
|
761
|
+
idx = idxhnsw;
|
|
762
|
+
} else if(h == fourcc ("IBMp") || h == fourcc ("IBM2")) {
|
|
763
|
+
bool is_map2 = h == fourcc ("IBM2");
|
|
764
|
+
IndexBinaryIDMap * idxmap = is_map2 ?
|
|
765
|
+
new IndexBinaryIDMap2 () : new IndexBinaryIDMap ();
|
|
766
|
+
read_index_binary_header (idxmap, f);
|
|
767
|
+
idxmap->index = read_index_binary (f, io_flags);
|
|
768
|
+
idxmap->own_fields = true;
|
|
769
|
+
READVECTOR (idxmap->id_map);
|
|
770
|
+
if (is_map2) {
|
|
771
|
+
static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map ();
|
|
772
|
+
}
|
|
773
|
+
idx = idxmap;
|
|
774
|
+
} else {
|
|
775
|
+
FAISS_THROW_FMT("Index type 0x%08x not supported\n", h);
|
|
776
|
+
idx = nullptr;
|
|
777
|
+
}
|
|
778
|
+
return idx;
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
IndexBinary *read_index_binary (FILE * f, int io_flags) {
|
|
782
|
+
FileIOReader reader(f);
|
|
783
|
+
return read_index_binary(&reader, io_flags);
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
IndexBinary *read_index_binary (const char *fname, int io_flags) {
|
|
787
|
+
FileIOReader reader(fname);
|
|
788
|
+
IndexBinary *idx = read_index_binary (&reader, io_flags);
|
|
789
|
+
return idx;
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
} // namespace faiss
|