faiss 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +103 -3
- data/ext/faiss/ext.cpp +99 -32
- data/ext/faiss/extconf.rb +12 -2
- data/lib/faiss/ext.bundle +0 -0
- data/lib/faiss/index.rb +3 -3
- data/lib/faiss/index_binary.rb +3 -3
- data/lib/faiss/kmeans.rb +1 -1
- data/lib/faiss/pca_matrix.rb +2 -2
- data/lib/faiss/product_quantizer.rb +3 -3
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/AutoTune.cpp +719 -0
- data/vendor/faiss/AutoTune.h +212 -0
- data/vendor/faiss/Clustering.cpp +261 -0
- data/vendor/faiss/Clustering.h +101 -0
- data/vendor/faiss/IVFlib.cpp +339 -0
- data/vendor/faiss/IVFlib.h +132 -0
- data/vendor/faiss/Index.cpp +171 -0
- data/vendor/faiss/Index.h +261 -0
- data/vendor/faiss/Index2Layer.cpp +437 -0
- data/vendor/faiss/Index2Layer.h +85 -0
- data/vendor/faiss/IndexBinary.cpp +77 -0
- data/vendor/faiss/IndexBinary.h +163 -0
- data/vendor/faiss/IndexBinaryFlat.cpp +83 -0
- data/vendor/faiss/IndexBinaryFlat.h +54 -0
- data/vendor/faiss/IndexBinaryFromFloat.cpp +78 -0
- data/vendor/faiss/IndexBinaryFromFloat.h +52 -0
- data/vendor/faiss/IndexBinaryHNSW.cpp +325 -0
- data/vendor/faiss/IndexBinaryHNSW.h +56 -0
- data/vendor/faiss/IndexBinaryIVF.cpp +671 -0
- data/vendor/faiss/IndexBinaryIVF.h +211 -0
- data/vendor/faiss/IndexFlat.cpp +508 -0
- data/vendor/faiss/IndexFlat.h +175 -0
- data/vendor/faiss/IndexHNSW.cpp +1090 -0
- data/vendor/faiss/IndexHNSW.h +170 -0
- data/vendor/faiss/IndexIVF.cpp +909 -0
- data/vendor/faiss/IndexIVF.h +353 -0
- data/vendor/faiss/IndexIVFFlat.cpp +502 -0
- data/vendor/faiss/IndexIVFFlat.h +118 -0
- data/vendor/faiss/IndexIVFPQ.cpp +1207 -0
- data/vendor/faiss/IndexIVFPQ.h +161 -0
- data/vendor/faiss/IndexIVFPQR.cpp +219 -0
- data/vendor/faiss/IndexIVFPQR.h +65 -0
- data/vendor/faiss/IndexIVFSpectralHash.cpp +331 -0
- data/vendor/faiss/IndexIVFSpectralHash.h +75 -0
- data/vendor/faiss/IndexLSH.cpp +225 -0
- data/vendor/faiss/IndexLSH.h +87 -0
- data/vendor/faiss/IndexLattice.cpp +143 -0
- data/vendor/faiss/IndexLattice.h +68 -0
- data/vendor/faiss/IndexPQ.cpp +1188 -0
- data/vendor/faiss/IndexPQ.h +199 -0
- data/vendor/faiss/IndexPreTransform.cpp +288 -0
- data/vendor/faiss/IndexPreTransform.h +91 -0
- data/vendor/faiss/IndexReplicas.cpp +123 -0
- data/vendor/faiss/IndexReplicas.h +76 -0
- data/vendor/faiss/IndexScalarQuantizer.cpp +317 -0
- data/vendor/faiss/IndexScalarQuantizer.h +127 -0
- data/vendor/faiss/IndexShards.cpp +317 -0
- data/vendor/faiss/IndexShards.h +100 -0
- data/vendor/faiss/InvertedLists.cpp +623 -0
- data/vendor/faiss/InvertedLists.h +334 -0
- data/vendor/faiss/LICENSE +21 -0
- data/vendor/faiss/MatrixStats.cpp +252 -0
- data/vendor/faiss/MatrixStats.h +62 -0
- data/vendor/faiss/MetaIndexes.cpp +351 -0
- data/vendor/faiss/MetaIndexes.h +126 -0
- data/vendor/faiss/OnDiskInvertedLists.cpp +674 -0
- data/vendor/faiss/OnDiskInvertedLists.h +127 -0
- data/vendor/faiss/VectorTransform.cpp +1157 -0
- data/vendor/faiss/VectorTransform.h +322 -0
- data/vendor/faiss/c_api/AutoTune_c.cpp +83 -0
- data/vendor/faiss/c_api/AutoTune_c.h +64 -0
- data/vendor/faiss/c_api/Clustering_c.cpp +139 -0
- data/vendor/faiss/c_api/Clustering_c.h +117 -0
- data/vendor/faiss/c_api/IndexFlat_c.cpp +140 -0
- data/vendor/faiss/c_api/IndexFlat_c.h +115 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +64 -0
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +58 -0
- data/vendor/faiss/c_api/IndexIVF_c.cpp +92 -0
- data/vendor/faiss/c_api/IndexIVF_c.h +135 -0
- data/vendor/faiss/c_api/IndexLSH_c.cpp +37 -0
- data/vendor/faiss/c_api/IndexLSH_c.h +40 -0
- data/vendor/faiss/c_api/IndexShards_c.cpp +44 -0
- data/vendor/faiss/c_api/IndexShards_c.h +42 -0
- data/vendor/faiss/c_api/Index_c.cpp +105 -0
- data/vendor/faiss/c_api/Index_c.h +183 -0
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +49 -0
- data/vendor/faiss/c_api/MetaIndexes_c.h +49 -0
- data/vendor/faiss/c_api/clone_index_c.cpp +23 -0
- data/vendor/faiss/c_api/clone_index_c.h +32 -0
- data/vendor/faiss/c_api/error_c.h +42 -0
- data/vendor/faiss/c_api/error_impl.cpp +27 -0
- data/vendor/faiss/c_api/error_impl.h +16 -0
- data/vendor/faiss/c_api/faiss_c.h +58 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +96 -0
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +56 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +52 -0
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +68 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +17 -0
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +30 -0
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +38 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +86 -0
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +66 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +54 -0
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +53 -0
- data/vendor/faiss/c_api/gpu/macros_impl.h +42 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +220 -0
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +149 -0
- data/vendor/faiss/c_api/index_factory_c.cpp +26 -0
- data/vendor/faiss/c_api/index_factory_c.h +30 -0
- data/vendor/faiss/c_api/index_io_c.cpp +42 -0
- data/vendor/faiss/c_api/index_io_c.h +50 -0
- data/vendor/faiss/c_api/macros_impl.h +110 -0
- data/vendor/faiss/clone_index.cpp +147 -0
- data/vendor/faiss/clone_index.h +38 -0
- data/vendor/faiss/demos/demo_imi_flat.cpp +151 -0
- data/vendor/faiss/demos/demo_imi_pq.cpp +199 -0
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +146 -0
- data/vendor/faiss/demos/demo_sift1M.cpp +252 -0
- data/vendor/faiss/gpu/GpuAutoTune.cpp +95 -0
- data/vendor/faiss/gpu/GpuAutoTune.h +27 -0
- data/vendor/faiss/gpu/GpuCloner.cpp +403 -0
- data/vendor/faiss/gpu/GpuCloner.h +82 -0
- data/vendor/faiss/gpu/GpuClonerOptions.cpp +28 -0
- data/vendor/faiss/gpu/GpuClonerOptions.h +53 -0
- data/vendor/faiss/gpu/GpuDistance.h +52 -0
- data/vendor/faiss/gpu/GpuFaissAssert.h +29 -0
- data/vendor/faiss/gpu/GpuIndex.h +148 -0
- data/vendor/faiss/gpu/GpuIndexBinaryFlat.h +89 -0
- data/vendor/faiss/gpu/GpuIndexFlat.h +190 -0
- data/vendor/faiss/gpu/GpuIndexIVF.h +89 -0
- data/vendor/faiss/gpu/GpuIndexIVFFlat.h +85 -0
- data/vendor/faiss/gpu/GpuIndexIVFPQ.h +143 -0
- data/vendor/faiss/gpu/GpuIndexIVFScalarQuantizer.h +100 -0
- data/vendor/faiss/gpu/GpuIndicesOptions.h +30 -0
- data/vendor/faiss/gpu/GpuResources.cpp +52 -0
- data/vendor/faiss/gpu/GpuResources.h +73 -0
- data/vendor/faiss/gpu/StandardGpuResources.cpp +295 -0
- data/vendor/faiss/gpu/StandardGpuResources.h +114 -0
- data/vendor/faiss/gpu/impl/RemapIndices.cpp +43 -0
- data/vendor/faiss/gpu/impl/RemapIndices.h +24 -0
- data/vendor/faiss/gpu/perf/IndexWrapper-inl.h +71 -0
- data/vendor/faiss/gpu/perf/IndexWrapper.h +39 -0
- data/vendor/faiss/gpu/perf/PerfClustering.cpp +115 -0
- data/vendor/faiss/gpu/perf/PerfIVFPQAdd.cpp +139 -0
- data/vendor/faiss/gpu/perf/WriteIndex.cpp +102 -0
- data/vendor/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +130 -0
- data/vendor/faiss/gpu/test/TestGpuIndexFlat.cpp +371 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +550 -0
- data/vendor/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +450 -0
- data/vendor/faiss/gpu/test/TestGpuMemoryException.cpp +84 -0
- data/vendor/faiss/gpu/test/TestUtils.cpp +315 -0
- data/vendor/faiss/gpu/test/TestUtils.h +93 -0
- data/vendor/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +159 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.cpp +77 -0
- data/vendor/faiss/gpu/utils/DeviceMemory.h +71 -0
- data/vendor/faiss/gpu/utils/DeviceUtils.h +185 -0
- data/vendor/faiss/gpu/utils/MemorySpace.cpp +89 -0
- data/vendor/faiss/gpu/utils/MemorySpace.h +44 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.cpp +239 -0
- data/vendor/faiss/gpu/utils/StackDeviceMemory.h +129 -0
- data/vendor/faiss/gpu/utils/StaticUtils.h +83 -0
- data/vendor/faiss/gpu/utils/Timer.cpp +60 -0
- data/vendor/faiss/gpu/utils/Timer.h +52 -0
- data/vendor/faiss/impl/AuxIndexStructures.cpp +305 -0
- data/vendor/faiss/impl/AuxIndexStructures.h +246 -0
- data/vendor/faiss/impl/FaissAssert.h +95 -0
- data/vendor/faiss/impl/FaissException.cpp +66 -0
- data/vendor/faiss/impl/FaissException.h +71 -0
- data/vendor/faiss/impl/HNSW.cpp +818 -0
- data/vendor/faiss/impl/HNSW.h +275 -0
- data/vendor/faiss/impl/PolysemousTraining.cpp +953 -0
- data/vendor/faiss/impl/PolysemousTraining.h +158 -0
- data/vendor/faiss/impl/ProductQuantizer.cpp +876 -0
- data/vendor/faiss/impl/ProductQuantizer.h +242 -0
- data/vendor/faiss/impl/ScalarQuantizer.cpp +1628 -0
- data/vendor/faiss/impl/ScalarQuantizer.h +120 -0
- data/vendor/faiss/impl/ThreadedIndex-inl.h +192 -0
- data/vendor/faiss/impl/ThreadedIndex.h +80 -0
- data/vendor/faiss/impl/index_read.cpp +793 -0
- data/vendor/faiss/impl/index_write.cpp +558 -0
- data/vendor/faiss/impl/io.cpp +142 -0
- data/vendor/faiss/impl/io.h +98 -0
- data/vendor/faiss/impl/lattice_Zn.cpp +712 -0
- data/vendor/faiss/impl/lattice_Zn.h +199 -0
- data/vendor/faiss/index_factory.cpp +392 -0
- data/vendor/faiss/index_factory.h +25 -0
- data/vendor/faiss/index_io.h +75 -0
- data/vendor/faiss/misc/test_blas.cpp +84 -0
- data/vendor/faiss/tests/test_binary_flat.cpp +64 -0
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +183 -0
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +67 -0
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +98 -0
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +566 -0
- data/vendor/faiss/tests/test_merge.cpp +258 -0
- data/vendor/faiss/tests/test_omp_threads.cpp +14 -0
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +220 -0
- data/vendor/faiss/tests/test_pairs_decoding.cpp +189 -0
- data/vendor/faiss/tests/test_params_override.cpp +231 -0
- data/vendor/faiss/tests/test_pq_encoding.cpp +98 -0
- data/vendor/faiss/tests/test_sliding_ivf.cpp +240 -0
- data/vendor/faiss/tests/test_threaded_index.cpp +253 -0
- data/vendor/faiss/tests/test_transfer_invlists.cpp +159 -0
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +98 -0
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +81 -0
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +93 -0
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +119 -0
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +99 -0
- data/vendor/faiss/utils/Heap.cpp +122 -0
- data/vendor/faiss/utils/Heap.h +495 -0
- data/vendor/faiss/utils/WorkerThread.cpp +126 -0
- data/vendor/faiss/utils/WorkerThread.h +61 -0
- data/vendor/faiss/utils/distances.cpp +765 -0
- data/vendor/faiss/utils/distances.h +243 -0
- data/vendor/faiss/utils/distances_simd.cpp +809 -0
- data/vendor/faiss/utils/extra_distances.cpp +336 -0
- data/vendor/faiss/utils/extra_distances.h +54 -0
- data/vendor/faiss/utils/hamming-inl.h +472 -0
- data/vendor/faiss/utils/hamming.cpp +792 -0
- data/vendor/faiss/utils/hamming.h +220 -0
- data/vendor/faiss/utils/random.cpp +192 -0
- data/vendor/faiss/utils/random.h +60 -0
- data/vendor/faiss/utils/utils.cpp +783 -0
- data/vendor/faiss/utils/utils.h +181 -0
- metadata +216 -2
@@ -0,0 +1,120 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// -*- c++ -*-
|
9
|
+
|
10
|
+
#pragma once
|
11
|
+
|
12
|
+
#include <faiss/IndexIVF.h>
|
13
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
14
|
+
|
15
|
+
|
16
|
+
namespace faiss {
|
17
|
+
|
18
|
+
/**
|
19
|
+
* The uniform quantizer has a range [vmin, vmax]. The range can be
|
20
|
+
* the same for all dimensions (uniform) or specific per dimension
|
21
|
+
* (default).
|
22
|
+
*/
|
23
|
+
|
24
|
+
struct ScalarQuantizer {
|
25
|
+
|
26
|
+
enum QuantizerType {
|
27
|
+
QT_8bit, ///< 8 bits per component
|
28
|
+
QT_4bit, ///< 4 bits per component
|
29
|
+
QT_8bit_uniform, ///< same, shared range for all dimensions
|
30
|
+
QT_4bit_uniform,
|
31
|
+
QT_fp16,
|
32
|
+
QT_8bit_direct, /// fast indexing of uint8s
|
33
|
+
QT_6bit, ///< 6 bits per component
|
34
|
+
};
|
35
|
+
|
36
|
+
QuantizerType qtype;
|
37
|
+
|
38
|
+
/** The uniform encoder can estimate the range of representable
|
39
|
+
* values of the unform encoder using different statistics. Here
|
40
|
+
* rs = rangestat_arg */
|
41
|
+
|
42
|
+
// rangestat_arg.
|
43
|
+
enum RangeStat {
|
44
|
+
RS_minmax, ///< [min - rs*(max-min), max + rs*(max-min)]
|
45
|
+
RS_meanstd, ///< [mean - std * rs, mean + std * rs]
|
46
|
+
RS_quantiles, ///< [Q(rs), Q(1-rs)]
|
47
|
+
RS_optim, ///< alternate optimization of reconstruction error
|
48
|
+
};
|
49
|
+
|
50
|
+
RangeStat rangestat;
|
51
|
+
float rangestat_arg;
|
52
|
+
|
53
|
+
/// dimension of input vectors
|
54
|
+
size_t d;
|
55
|
+
|
56
|
+
/// bytes per vector
|
57
|
+
size_t code_size;
|
58
|
+
|
59
|
+
/// trained values (including the range)
|
60
|
+
std::vector<float> trained;
|
61
|
+
|
62
|
+
ScalarQuantizer (size_t d, QuantizerType qtype);
|
63
|
+
ScalarQuantizer ();
|
64
|
+
|
65
|
+
void train (size_t n, const float *x);
|
66
|
+
|
67
|
+
/// Used by an IVF index to train based on the residuals
|
68
|
+
void train_residual (size_t n,
|
69
|
+
const float *x,
|
70
|
+
Index *quantizer,
|
71
|
+
bool by_residual,
|
72
|
+
bool verbose);
|
73
|
+
|
74
|
+
/// same as compute_code for several vectors
|
75
|
+
void compute_codes (const float * x,
|
76
|
+
uint8_t * codes,
|
77
|
+
size_t n) const ;
|
78
|
+
|
79
|
+
/// decode a vector from a given code (or n vectors if third argument)
|
80
|
+
void decode (const uint8_t *code, float *x, size_t n) const;
|
81
|
+
|
82
|
+
|
83
|
+
/*****************************************************
|
84
|
+
* Objects that provide methods for encoding/decoding, distance
|
85
|
+
* computation and inverted list scanning
|
86
|
+
*****************************************************/
|
87
|
+
|
88
|
+
struct Quantizer {
|
89
|
+
// encodes one vector. Assumes code is filled with 0s on input!
|
90
|
+
virtual void encode_vector(const float *x, uint8_t *code) const = 0;
|
91
|
+
virtual void decode_vector(const uint8_t *code, float *x) const = 0;
|
92
|
+
|
93
|
+
virtual ~Quantizer() {}
|
94
|
+
};
|
95
|
+
|
96
|
+
Quantizer * select_quantizer() const;
|
97
|
+
|
98
|
+
struct SQDistanceComputer: DistanceComputer {
|
99
|
+
|
100
|
+
const float *q;
|
101
|
+
const uint8_t *codes;
|
102
|
+
size_t code_size;
|
103
|
+
|
104
|
+
SQDistanceComputer (): q(nullptr), codes (nullptr), code_size (0)
|
105
|
+
{}
|
106
|
+
|
107
|
+
};
|
108
|
+
|
109
|
+
SQDistanceComputer *get_distance_computer (MetricType metric = METRIC_L2)
|
110
|
+
const;
|
111
|
+
|
112
|
+
InvertedListScanner *select_InvertedListScanner
|
113
|
+
(MetricType mt, const Index *quantizer, bool store_pairs,
|
114
|
+
bool by_residual=false) const;
|
115
|
+
|
116
|
+
};
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
} // namespace faiss
|
@@ -0,0 +1,192 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#include <faiss/impl/FaissAssert.h>
|
9
|
+
#include <exception>
|
10
|
+
#include <iostream>
|
11
|
+
|
12
|
+
namespace faiss {
|
13
|
+
|
14
|
+
template <typename IndexT>
|
15
|
+
ThreadedIndex<IndexT>::ThreadedIndex(bool threaded)
|
16
|
+
// 0 is default dimension
|
17
|
+
: ThreadedIndex(0, threaded) {
|
18
|
+
}
|
19
|
+
|
20
|
+
template <typename IndexT>
|
21
|
+
ThreadedIndex<IndexT>::ThreadedIndex(int d, bool threaded)
|
22
|
+
: IndexT(d),
|
23
|
+
own_fields(false),
|
24
|
+
isThreaded_(threaded) {
|
25
|
+
}
|
26
|
+
|
27
|
+
template <typename IndexT>
|
28
|
+
ThreadedIndex<IndexT>::~ThreadedIndex() {
|
29
|
+
for (auto& p : indices_) {
|
30
|
+
if (isThreaded_) {
|
31
|
+
// should have worker thread
|
32
|
+
FAISS_ASSERT((bool) p.second);
|
33
|
+
|
34
|
+
// This will also flush all pending work
|
35
|
+
p.second->stop();
|
36
|
+
p.second->waitForThreadExit();
|
37
|
+
} else {
|
38
|
+
// should not have worker thread
|
39
|
+
FAISS_ASSERT(!(bool) p.second);
|
40
|
+
}
|
41
|
+
|
42
|
+
if (own_fields) {
|
43
|
+
delete p.first;
|
44
|
+
}
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
template <typename IndexT>
|
49
|
+
void ThreadedIndex<IndexT>::addIndex(IndexT* index) {
|
50
|
+
// We inherit the dimension from the first index added to us if we don't have
|
51
|
+
// a set dimension
|
52
|
+
if (indices_.empty() && this->d == 0) {
|
53
|
+
this->d = index->d;
|
54
|
+
}
|
55
|
+
|
56
|
+
// The new index must match our set dimension
|
57
|
+
FAISS_THROW_IF_NOT_FMT(this->d == index->d,
|
58
|
+
"addIndex: dimension mismatch for "
|
59
|
+
"newly added index; expecting dim %d, "
|
60
|
+
"new index has dim %d",
|
61
|
+
this->d, index->d);
|
62
|
+
|
63
|
+
if (!indices_.empty()) {
|
64
|
+
auto& existing = indices_.front().first;
|
65
|
+
|
66
|
+
FAISS_THROW_IF_NOT_MSG(index->metric_type == existing->metric_type,
|
67
|
+
"addIndex: newly added index is "
|
68
|
+
"of different metric type than old index");
|
69
|
+
|
70
|
+
// Make sure this index is not duplicated
|
71
|
+
for (auto& p : indices_) {
|
72
|
+
FAISS_THROW_IF_NOT_MSG(p.first != index,
|
73
|
+
"addIndex: attempting to add index "
|
74
|
+
"that is already in the collection");
|
75
|
+
}
|
76
|
+
}
|
77
|
+
|
78
|
+
indices_.emplace_back(
|
79
|
+
std::make_pair(
|
80
|
+
index,
|
81
|
+
std::unique_ptr<WorkerThread>(isThreaded_ ?
|
82
|
+
new WorkerThread : nullptr)));
|
83
|
+
|
84
|
+
onAfterAddIndex(index);
|
85
|
+
}
|
86
|
+
|
87
|
+
template <typename IndexT>
|
88
|
+
void ThreadedIndex<IndexT>::removeIndex(IndexT* index) {
|
89
|
+
for (auto it = indices_.begin(); it != indices_.end(); ++it) {
|
90
|
+
if (it->first == index) {
|
91
|
+
// This is our index; stop the worker thread before removing it,
|
92
|
+
// to ensure that it has finished before function exit
|
93
|
+
if (isThreaded_) {
|
94
|
+
// should have worker thread
|
95
|
+
FAISS_ASSERT((bool) it->second);
|
96
|
+
it->second->stop();
|
97
|
+
it->second->waitForThreadExit();
|
98
|
+
} else {
|
99
|
+
// should not have worker thread
|
100
|
+
FAISS_ASSERT(!(bool) it->second);
|
101
|
+
}
|
102
|
+
|
103
|
+
indices_.erase(it);
|
104
|
+
onAfterRemoveIndex(index);
|
105
|
+
|
106
|
+
if (own_fields) {
|
107
|
+
delete index;
|
108
|
+
}
|
109
|
+
|
110
|
+
return;
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
// could not find our index
|
115
|
+
FAISS_THROW_MSG("IndexReplicas::removeIndex: index not found");
|
116
|
+
}
|
117
|
+
|
118
|
+
template <typename IndexT>
|
119
|
+
void ThreadedIndex<IndexT>::runOnIndex(std::function<void(int, IndexT*)> f) {
|
120
|
+
if (isThreaded_) {
|
121
|
+
std::vector<std::future<bool>> v;
|
122
|
+
|
123
|
+
for (int i = 0; i < this->indices_.size(); ++i) {
|
124
|
+
auto& p = this->indices_[i];
|
125
|
+
auto indexPtr = p.first;
|
126
|
+
v.emplace_back(p.second->add([f, i, indexPtr](){ f(i, indexPtr); }));
|
127
|
+
}
|
128
|
+
|
129
|
+
waitAndHandleFutures(v);
|
130
|
+
} else {
|
131
|
+
// Multiple exceptions may be thrown; gather them as we encounter them,
|
132
|
+
// while letting everything else run to completion
|
133
|
+
std::vector<std::pair<int, std::exception_ptr>> exceptions;
|
134
|
+
|
135
|
+
for (int i = 0; i < this->indices_.size(); ++i) {
|
136
|
+
auto& p = this->indices_[i];
|
137
|
+
try {
|
138
|
+
f(i, p.first);
|
139
|
+
} catch (...) {
|
140
|
+
exceptions.emplace_back(std::make_pair(i, std::current_exception()));
|
141
|
+
}
|
142
|
+
}
|
143
|
+
|
144
|
+
handleExceptions(exceptions);
|
145
|
+
}
|
146
|
+
}
|
147
|
+
|
148
|
+
template <typename IndexT>
|
149
|
+
void ThreadedIndex<IndexT>::runOnIndex(
|
150
|
+
std::function<void(int, const IndexT*)> f) const {
|
151
|
+
const_cast<ThreadedIndex<IndexT>*>(this)->runOnIndex(
|
152
|
+
[f](int i, IndexT* idx){ f(i, idx); });
|
153
|
+
}
|
154
|
+
|
155
|
+
template <typename IndexT>
|
156
|
+
void ThreadedIndex<IndexT>::reset() {
|
157
|
+
runOnIndex([](int, IndexT* index){ index->reset(); });
|
158
|
+
this->ntotal = 0;
|
159
|
+
this->is_trained = false;
|
160
|
+
}
|
161
|
+
|
162
|
+
template <typename IndexT>
|
163
|
+
void
|
164
|
+
ThreadedIndex<IndexT>::onAfterAddIndex(IndexT* index) {
|
165
|
+
}
|
166
|
+
|
167
|
+
template <typename IndexT>
|
168
|
+
void
|
169
|
+
ThreadedIndex<IndexT>::onAfterRemoveIndex(IndexT* index) {
|
170
|
+
}
|
171
|
+
|
172
|
+
template <typename IndexT>
|
173
|
+
void
|
174
|
+
ThreadedIndex<IndexT>::waitAndHandleFutures(std::vector<std::future<bool>>& v) {
|
175
|
+
// Blocking wait for completion for all of the indices, capturing any
|
176
|
+
// exceptions that are generated
|
177
|
+
std::vector<std::pair<int, std::exception_ptr>> exceptions;
|
178
|
+
|
179
|
+
for (int i = 0; i < v.size(); ++i) {
|
180
|
+
auto& fut = v[i];
|
181
|
+
|
182
|
+
try {
|
183
|
+
fut.get();
|
184
|
+
} catch (...) {
|
185
|
+
exceptions.emplace_back(std::make_pair(i, std::current_exception()));
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
handleExceptions(exceptions);
|
190
|
+
}
|
191
|
+
|
192
|
+
} // namespace
|
@@ -0,0 +1,80 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#pragma once
|
9
|
+
|
10
|
+
#include <faiss/Index.h>
|
11
|
+
#include <faiss/IndexBinary.h>
|
12
|
+
#include <faiss/utils/WorkerThread.h>
|
13
|
+
#include <memory>
|
14
|
+
#include <vector>
|
15
|
+
|
16
|
+
namespace faiss {
|
17
|
+
|
18
|
+
/// A holder of indices in a collection of threads
|
19
|
+
/// The interface to this class itself is not thread safe
|
20
|
+
template <typename IndexT>
|
21
|
+
class ThreadedIndex : public IndexT {
|
22
|
+
public:
|
23
|
+
explicit ThreadedIndex(bool threaded);
|
24
|
+
explicit ThreadedIndex(int d, bool threaded);
|
25
|
+
|
26
|
+
~ThreadedIndex() override;
|
27
|
+
|
28
|
+
/// override an index that is managed by ourselves.
|
29
|
+
/// WARNING: once an index is added, it becomes unsafe to touch it from any
|
30
|
+
/// other thread than that on which is managing it, until we are shut
|
31
|
+
/// down. Use runOnIndex to perform work on it instead.
|
32
|
+
void addIndex(IndexT* index);
|
33
|
+
|
34
|
+
/// Remove an index that is managed by ourselves.
|
35
|
+
/// This will flush all pending work on that index, and then shut
|
36
|
+
/// down its managing thread, and will remove the index.
|
37
|
+
void removeIndex(IndexT* index);
|
38
|
+
|
39
|
+
/// Run a function on all indices, in the thread that the index is
|
40
|
+
/// managed in.
|
41
|
+
/// Function arguments are (index in collection, index pointer)
|
42
|
+
void runOnIndex(std::function<void(int, IndexT*)> f);
|
43
|
+
void runOnIndex(std::function<void(int, const IndexT*)> f) const;
|
44
|
+
|
45
|
+
/// faiss::Index API
|
46
|
+
/// All indices receive the same call
|
47
|
+
void reset() override;
|
48
|
+
|
49
|
+
/// Returns the number of sub-indices
|
50
|
+
int count() const { return indices_.size(); }
|
51
|
+
|
52
|
+
/// Returns the i-th sub-index
|
53
|
+
IndexT* at(int i) { return indices_[i].first; }
|
54
|
+
|
55
|
+
/// Returns the i-th sub-index (const version)
|
56
|
+
const IndexT* at(int i) const { return indices_[i].first; }
|
57
|
+
|
58
|
+
/// Whether or not we are responsible for deleting our contained indices
|
59
|
+
bool own_fields;
|
60
|
+
|
61
|
+
protected:
|
62
|
+
/// Called just after an index is added
|
63
|
+
virtual void onAfterAddIndex(IndexT* index);
|
64
|
+
|
65
|
+
/// Called just after an index is removed
|
66
|
+
virtual void onAfterRemoveIndex(IndexT* index);
|
67
|
+
|
68
|
+
protected:
|
69
|
+
static void waitAndHandleFutures(std::vector<std::future<bool>>& v);
|
70
|
+
|
71
|
+
/// Collection of Index instances, with their managing worker thread if any
|
72
|
+
std::vector<std::pair<IndexT*, std::unique_ptr<WorkerThread>>> indices_;
|
73
|
+
|
74
|
+
/// Is this index multi-threaded?
|
75
|
+
bool isThreaded_;
|
76
|
+
};
|
77
|
+
|
78
|
+
} // namespace
|
79
|
+
|
80
|
+
#include <faiss/impl/ThreadedIndex-inl.h>
|
@@ -0,0 +1,793 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
// -*- c++ -*-
|
9
|
+
|
10
|
+
#include <faiss/index_io.h>
|
11
|
+
|
12
|
+
#include <cstdio>
|
13
|
+
#include <cstdlib>
|
14
|
+
|
15
|
+
#include <sys/mman.h>
|
16
|
+
#include <sys/types.h>
|
17
|
+
#include <sys/stat.h>
|
18
|
+
#include <unistd.h>
|
19
|
+
|
20
|
+
#include <faiss/impl/FaissAssert.h>
|
21
|
+
#include <faiss/impl/io.h>
|
22
|
+
|
23
|
+
#include <faiss/IndexFlat.h>
|
24
|
+
#include <faiss/VectorTransform.h>
|
25
|
+
#include <faiss/IndexPreTransform.h>
|
26
|
+
#include <faiss/IndexLSH.h>
|
27
|
+
#include <faiss/IndexPQ.h>
|
28
|
+
#include <faiss/IndexIVF.h>
|
29
|
+
#include <faiss/IndexIVFPQ.h>
|
30
|
+
#include <faiss/IndexIVFPQR.h>
|
31
|
+
#include <faiss/Index2Layer.h>
|
32
|
+
#include <faiss/IndexIVFFlat.h>
|
33
|
+
#include <faiss/IndexIVFSpectralHash.h>
|
34
|
+
#include <faiss/MetaIndexes.h>
|
35
|
+
#include <faiss/IndexScalarQuantizer.h>
|
36
|
+
#include <faiss/IndexHNSW.h>
|
37
|
+
#include <faiss/IndexLattice.h>
|
38
|
+
|
39
|
+
#include <faiss/OnDiskInvertedLists.h>
|
40
|
+
#include <faiss/IndexBinaryFlat.h>
|
41
|
+
#include <faiss/IndexBinaryFromFloat.h>
|
42
|
+
#include <faiss/IndexBinaryHNSW.h>
|
43
|
+
#include <faiss/IndexBinaryIVF.h>
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
namespace faiss {
|
48
|
+
|
49
|
+
/*************************************************************
|
50
|
+
* I/O macros
|
51
|
+
*
|
52
|
+
* we use macros so that we have a line number to report in abort
|
53
|
+
* (). This makes debugging a lot easier. The IOReader or IOWriter is
|
54
|
+
* always called f and thus is not passed in as a macro parameter.
|
55
|
+
**************************************************************/
|
56
|
+
|
57
|
+
|
58
|
+
#define READANDCHECK(ptr, n) { \
|
59
|
+
size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
|
60
|
+
FAISS_THROW_IF_NOT_FMT(ret == (n), \
|
61
|
+
"read error in %s: %ld != %ld (%s)", \
|
62
|
+
f->name.c_str(), ret, size_t(n), strerror(errno)); \
|
63
|
+
}
|
64
|
+
|
65
|
+
#define READ1(x) READANDCHECK(&(x), 1)
|
66
|
+
|
67
|
+
// will fail if we write 256G of data at once...
|
68
|
+
#define READVECTOR(vec) { \
|
69
|
+
long size; \
|
70
|
+
READANDCHECK (&size, 1); \
|
71
|
+
FAISS_THROW_IF_NOT (size >= 0 && size < (1L << 40)); \
|
72
|
+
(vec).resize (size); \
|
73
|
+
READANDCHECK ((vec).data (), size); \
|
74
|
+
}
|
75
|
+
|
76
|
+
|
77
|
+
|
78
|
+
/*************************************************************
|
79
|
+
* Read
|
80
|
+
**************************************************************/
|
81
|
+
|
82
|
+
static void read_index_header (Index *idx, IOReader *f) {
|
83
|
+
READ1 (idx->d);
|
84
|
+
READ1 (idx->ntotal);
|
85
|
+
Index::idx_t dummy;
|
86
|
+
READ1 (dummy);
|
87
|
+
READ1 (dummy);
|
88
|
+
READ1 (idx->is_trained);
|
89
|
+
READ1 (idx->metric_type);
|
90
|
+
if (idx->metric_type > 1) {
|
91
|
+
READ1 (idx->metric_arg);
|
92
|
+
}
|
93
|
+
idx->verbose = false;
|
94
|
+
}
|
95
|
+
|
96
|
+
VectorTransform* read_VectorTransform (IOReader *f) {
|
97
|
+
uint32_t h;
|
98
|
+
READ1 (h);
|
99
|
+
VectorTransform *vt = nullptr;
|
100
|
+
|
101
|
+
if (h == fourcc ("rrot") || h == fourcc ("PCAm") ||
|
102
|
+
h == fourcc ("LTra") || h == fourcc ("PcAm") ||
|
103
|
+
h == fourcc ("Viqm")) {
|
104
|
+
LinearTransform *lt = nullptr;
|
105
|
+
if (h == fourcc ("rrot")) {
|
106
|
+
lt = new RandomRotationMatrix ();
|
107
|
+
} else if (h == fourcc ("PCAm") ||
|
108
|
+
h == fourcc ("PcAm")) {
|
109
|
+
PCAMatrix * pca = new PCAMatrix ();
|
110
|
+
READ1 (pca->eigen_power);
|
111
|
+
READ1 (pca->random_rotation);
|
112
|
+
if (h == fourcc ("PcAm"))
|
113
|
+
READ1 (pca->balanced_bins);
|
114
|
+
READVECTOR (pca->mean);
|
115
|
+
READVECTOR (pca->eigenvalues);
|
116
|
+
READVECTOR (pca->PCAMat);
|
117
|
+
lt = pca;
|
118
|
+
} else if (h == fourcc ("Viqm")) {
|
119
|
+
ITQMatrix *itqm = new ITQMatrix ();
|
120
|
+
READ1 (itqm->max_iter);
|
121
|
+
READ1 (itqm->seed);
|
122
|
+
lt = itqm;
|
123
|
+
} else if (h == fourcc ("LTra")) {
|
124
|
+
lt = new LinearTransform ();
|
125
|
+
}
|
126
|
+
READ1 (lt->have_bias);
|
127
|
+
READVECTOR (lt->A);
|
128
|
+
READVECTOR (lt->b);
|
129
|
+
FAISS_THROW_IF_NOT (lt->A.size() >= lt->d_in * lt->d_out);
|
130
|
+
FAISS_THROW_IF_NOT (!lt->have_bias || lt->b.size() >= lt->d_out);
|
131
|
+
lt->set_is_orthonormal();
|
132
|
+
vt = lt;
|
133
|
+
} else if (h == fourcc ("RmDT")) {
|
134
|
+
RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
|
135
|
+
READVECTOR (rdt->map);
|
136
|
+
vt = rdt;
|
137
|
+
} else if (h == fourcc ("VNrm")) {
|
138
|
+
NormalizationTransform *nt = new NormalizationTransform ();
|
139
|
+
READ1 (nt->norm);
|
140
|
+
vt = nt;
|
141
|
+
} else if (h == fourcc ("VCnt")) {
|
142
|
+
CenteringTransform *ct = new CenteringTransform ();
|
143
|
+
READVECTOR (ct->mean);
|
144
|
+
vt = ct;
|
145
|
+
} else if (h == fourcc ("Viqt")) {
|
146
|
+
ITQTransform *itqt = new ITQTransform ();
|
147
|
+
|
148
|
+
READVECTOR (itqt->mean);
|
149
|
+
READ1 (itqt->do_pca);
|
150
|
+
{
|
151
|
+
ITQMatrix *itqm = dynamic_cast<ITQMatrix*>
|
152
|
+
(read_VectorTransform (f));
|
153
|
+
FAISS_THROW_IF_NOT(itqm);
|
154
|
+
itqt->itq = *itqm;
|
155
|
+
delete itqm;
|
156
|
+
}
|
157
|
+
{
|
158
|
+
LinearTransform *pi = dynamic_cast<LinearTransform*>
|
159
|
+
(read_VectorTransform (f));
|
160
|
+
FAISS_THROW_IF_NOT (pi);
|
161
|
+
itqt->pca_then_itq = *pi;
|
162
|
+
delete pi;
|
163
|
+
}
|
164
|
+
vt = itqt;
|
165
|
+
} else {
|
166
|
+
FAISS_THROW_MSG("fourcc not recognized");
|
167
|
+
}
|
168
|
+
READ1 (vt->d_in);
|
169
|
+
READ1 (vt->d_out);
|
170
|
+
READ1 (vt->is_trained);
|
171
|
+
return vt;
|
172
|
+
}
|
173
|
+
|
174
|
+
|
175
|
+
static void read_ArrayInvertedLists_sizes (
|
176
|
+
IOReader *f, std::vector<size_t> & sizes)
|
177
|
+
{
|
178
|
+
uint32_t list_type;
|
179
|
+
READ1(list_type);
|
180
|
+
if (list_type == fourcc("full")) {
|
181
|
+
size_t os = sizes.size();
|
182
|
+
READVECTOR (sizes);
|
183
|
+
FAISS_THROW_IF_NOT (os == sizes.size());
|
184
|
+
} else if (list_type == fourcc("sprs")) {
|
185
|
+
std::vector<size_t> idsizes;
|
186
|
+
READVECTOR (idsizes);
|
187
|
+
for (size_t j = 0; j < idsizes.size(); j += 2) {
|
188
|
+
FAISS_THROW_IF_NOT (idsizes[j] < sizes.size());
|
189
|
+
sizes[idsizes[j]] = idsizes[j + 1];
|
190
|
+
}
|
191
|
+
} else {
|
192
|
+
FAISS_THROW_MSG ("invalid list_type");
|
193
|
+
}
|
194
|
+
}
|
195
|
+
|
196
|
+
InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
|
197
|
+
uint32_t h;
|
198
|
+
READ1 (h);
|
199
|
+
if (h == fourcc ("il00")) {
|
200
|
+
fprintf(stderr, "read_InvertedLists:"
|
201
|
+
" WARN! inverted lists not stored with IVF object\n");
|
202
|
+
return nullptr;
|
203
|
+
} else if (h == fourcc ("ilar") && !(io_flags & IO_FLAG_MMAP)) {
|
204
|
+
auto ails = new ArrayInvertedLists (0, 0);
|
205
|
+
READ1 (ails->nlist);
|
206
|
+
READ1 (ails->code_size);
|
207
|
+
ails->ids.resize (ails->nlist);
|
208
|
+
ails->codes.resize (ails->nlist);
|
209
|
+
std::vector<size_t> sizes (ails->nlist);
|
210
|
+
read_ArrayInvertedLists_sizes (f, sizes);
|
211
|
+
for (size_t i = 0; i < ails->nlist; i++) {
|
212
|
+
ails->ids[i].resize (sizes[i]);
|
213
|
+
ails->codes[i].resize (sizes[i] * ails->code_size);
|
214
|
+
}
|
215
|
+
for (size_t i = 0; i < ails->nlist; i++) {
|
216
|
+
size_t n = ails->ids[i].size();
|
217
|
+
if (n > 0) {
|
218
|
+
READANDCHECK (ails->codes[i].data(), n * ails->code_size);
|
219
|
+
READANDCHECK (ails->ids[i].data(), n);
|
220
|
+
}
|
221
|
+
}
|
222
|
+
return ails;
|
223
|
+
} else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_MMAP)) {
|
224
|
+
// then we load it as an OnDiskInvertedLists
|
225
|
+
|
226
|
+
FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
|
227
|
+
FAISS_THROW_IF_NOT_MSG(reader, "mmap only supported for File objects");
|
228
|
+
FILE *fdesc = reader->f;
|
229
|
+
|
230
|
+
auto ails = new OnDiskInvertedLists ();
|
231
|
+
READ1 (ails->nlist);
|
232
|
+
READ1 (ails->code_size);
|
233
|
+
ails->read_only = true;
|
234
|
+
ails->lists.resize (ails->nlist);
|
235
|
+
std::vector<size_t> sizes (ails->nlist);
|
236
|
+
read_ArrayInvertedLists_sizes (f, sizes);
|
237
|
+
size_t o0 = ftell(fdesc), o = o0;
|
238
|
+
{ // do the mmap
|
239
|
+
struct stat buf;
|
240
|
+
int ret = fstat (fileno(fdesc), &buf);
|
241
|
+
FAISS_THROW_IF_NOT_FMT (ret == 0,
|
242
|
+
"fstat failed: %s", strerror(errno));
|
243
|
+
ails->totsize = buf.st_size;
|
244
|
+
ails->ptr = (uint8_t*)mmap (nullptr, ails->totsize,
|
245
|
+
PROT_READ, MAP_SHARED,
|
246
|
+
fileno(fdesc), 0);
|
247
|
+
FAISS_THROW_IF_NOT_FMT (ails->ptr != MAP_FAILED,
|
248
|
+
"could not mmap: %s",
|
249
|
+
strerror(errno));
|
250
|
+
}
|
251
|
+
|
252
|
+
for (size_t i = 0; i < ails->nlist; i++) {
|
253
|
+
OnDiskInvertedLists::List & l = ails->lists[i];
|
254
|
+
l.size = l.capacity = sizes[i];
|
255
|
+
l.offset = o;
|
256
|
+
o += l.size * (sizeof(OnDiskInvertedLists::idx_t) +
|
257
|
+
ails->code_size);
|
258
|
+
}
|
259
|
+
FAISS_THROW_IF_NOT(o <= ails->totsize);
|
260
|
+
// resume normal reading of file
|
261
|
+
fseek (fdesc, o, SEEK_SET);
|
262
|
+
return ails;
|
263
|
+
} else if (h == fourcc ("ilod")) {
|
264
|
+
OnDiskInvertedLists *od = new OnDiskInvertedLists();
|
265
|
+
od->read_only = io_flags & IO_FLAG_READ_ONLY;
|
266
|
+
READ1 (od->nlist);
|
267
|
+
READ1 (od->code_size);
|
268
|
+
// this is a POD object
|
269
|
+
READVECTOR (od->lists);
|
270
|
+
{
|
271
|
+
std::vector<OnDiskInvertedLists::Slot> v;
|
272
|
+
READVECTOR(v);
|
273
|
+
od->slots.assign(v.begin(), v.end());
|
274
|
+
}
|
275
|
+
{
|
276
|
+
std::vector<char> x;
|
277
|
+
READVECTOR(x);
|
278
|
+
od->filename.assign(x.begin(), x.end());
|
279
|
+
|
280
|
+
if (io_flags & IO_FLAG_ONDISK_SAME_DIR) {
|
281
|
+
FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
|
282
|
+
FAISS_THROW_IF_NOT_MSG (
|
283
|
+
reader, "IO_FLAG_ONDISK_SAME_DIR only supported "
|
284
|
+
"when reading from file");
|
285
|
+
std::string indexname = reader->name;
|
286
|
+
std::string dirname = "./";
|
287
|
+
size_t slash = indexname.find_last_of('/');
|
288
|
+
if (slash != std::string::npos) {
|
289
|
+
dirname = indexname.substr(0, slash + 1);
|
290
|
+
}
|
291
|
+
std::string filename = od->filename;
|
292
|
+
slash = filename.find_last_of('/');
|
293
|
+
if (slash != std::string::npos) {
|
294
|
+
filename = filename.substr(slash + 1);
|
295
|
+
}
|
296
|
+
filename = dirname + filename;
|
297
|
+
printf("IO_FLAG_ONDISK_SAME_DIR: "
|
298
|
+
"updating ondisk filename from %s to %s\n",
|
299
|
+
od->filename.c_str(), filename.c_str());
|
300
|
+
od->filename = filename;
|
301
|
+
}
|
302
|
+
|
303
|
+
}
|
304
|
+
READ1(od->totsize);
|
305
|
+
od->do_mmap();
|
306
|
+
return od;
|
307
|
+
} else {
|
308
|
+
FAISS_THROW_MSG ("read_InvertedLists: unsupported invlist type");
|
309
|
+
}
|
310
|
+
}
|
311
|
+
|
312
|
+
static void read_InvertedLists (
|
313
|
+
IndexIVF *ivf, IOReader *f, int io_flags) {
|
314
|
+
InvertedLists *ils = read_InvertedLists (f, io_flags);
|
315
|
+
FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist &&
|
316
|
+
ils->code_size == ivf->code_size));
|
317
|
+
ivf->invlists = ils;
|
318
|
+
ivf->own_invlists = true;
|
319
|
+
}
|
320
|
+
|
321
|
+
static void read_ProductQuantizer (ProductQuantizer *pq, IOReader *f) {
|
322
|
+
READ1 (pq->d);
|
323
|
+
READ1 (pq->M);
|
324
|
+
READ1 (pq->nbits);
|
325
|
+
pq->set_derived_values ();
|
326
|
+
READVECTOR (pq->centroids);
|
327
|
+
}
|
328
|
+
|
329
|
+
static void read_ScalarQuantizer (ScalarQuantizer *ivsc, IOReader *f) {
|
330
|
+
READ1 (ivsc->qtype);
|
331
|
+
READ1 (ivsc->rangestat);
|
332
|
+
READ1 (ivsc->rangestat_arg);
|
333
|
+
READ1 (ivsc->d);
|
334
|
+
READ1 (ivsc->code_size);
|
335
|
+
READVECTOR (ivsc->trained);
|
336
|
+
}
|
337
|
+
|
338
|
+
|
339
|
+
static void read_HNSW (HNSW *hnsw, IOReader *f) {
|
340
|
+
READVECTOR (hnsw->assign_probas);
|
341
|
+
READVECTOR (hnsw->cum_nneighbor_per_level);
|
342
|
+
READVECTOR (hnsw->levels);
|
343
|
+
READVECTOR (hnsw->offsets);
|
344
|
+
READVECTOR (hnsw->neighbors);
|
345
|
+
|
346
|
+
READ1 (hnsw->entry_point);
|
347
|
+
READ1 (hnsw->max_level);
|
348
|
+
READ1 (hnsw->efConstruction);
|
349
|
+
READ1 (hnsw->efSearch);
|
350
|
+
READ1 (hnsw->upper_beam);
|
351
|
+
}
|
352
|
+
|
353
|
+
ProductQuantizer * read_ProductQuantizer (const char*fname) {
|
354
|
+
FileIOReader reader(fname);
|
355
|
+
return read_ProductQuantizer(&reader);
|
356
|
+
}
|
357
|
+
|
358
|
+
ProductQuantizer * read_ProductQuantizer (IOReader *reader) {
|
359
|
+
ProductQuantizer *pq = new ProductQuantizer();
|
360
|
+
ScopeDeleter1<ProductQuantizer> del (pq);
|
361
|
+
|
362
|
+
read_ProductQuantizer(pq, reader);
|
363
|
+
del.release ();
|
364
|
+
return pq;
|
365
|
+
}
|
366
|
+
|
367
|
+
static void read_ivf_header (
|
368
|
+
IndexIVF *ivf, IOReader *f,
|
369
|
+
std::vector<std::vector<Index::idx_t> > *ids = nullptr)
|
370
|
+
{
|
371
|
+
read_index_header (ivf, f);
|
372
|
+
READ1 (ivf->nlist);
|
373
|
+
READ1 (ivf->nprobe);
|
374
|
+
ivf->quantizer = read_index (f);
|
375
|
+
ivf->own_fields = true;
|
376
|
+
if (ids) { // used in legacy "Iv" formats
|
377
|
+
ids->resize (ivf->nlist);
|
378
|
+
for (size_t i = 0; i < ivf->nlist; i++)
|
379
|
+
READVECTOR ((*ids)[i]);
|
380
|
+
}
|
381
|
+
READ1 (ivf->maintain_direct_map);
|
382
|
+
READVECTOR (ivf->direct_map);
|
383
|
+
}
|
384
|
+
|
385
|
+
// used for legacy formats
|
386
|
+
static ArrayInvertedLists *set_array_invlist(
|
387
|
+
IndexIVF *ivf, std::vector<std::vector<Index::idx_t> > &ids)
|
388
|
+
{
|
389
|
+
ArrayInvertedLists *ail = new ArrayInvertedLists (
|
390
|
+
ivf->nlist, ivf->code_size);
|
391
|
+
std::swap (ail->ids, ids);
|
392
|
+
ivf->invlists = ail;
|
393
|
+
ivf->own_invlists = true;
|
394
|
+
return ail;
|
395
|
+
}
|
396
|
+
|
397
|
+
static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags)
|
398
|
+
{
|
399
|
+
bool legacy = h == fourcc ("IvQR") || h == fourcc ("IvPQ");
|
400
|
+
|
401
|
+
IndexIVFPQR *ivfpqr =
|
402
|
+
h == fourcc ("IvQR") || h == fourcc ("IwQR") ?
|
403
|
+
new IndexIVFPQR () : nullptr;
|
404
|
+
IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ ();
|
405
|
+
|
406
|
+
std::vector<std::vector<Index::idx_t> > ids;
|
407
|
+
read_ivf_header (ivpq, f, legacy ? &ids : nullptr);
|
408
|
+
READ1 (ivpq->by_residual);
|
409
|
+
READ1 (ivpq->code_size);
|
410
|
+
read_ProductQuantizer (&ivpq->pq, f);
|
411
|
+
|
412
|
+
if (legacy) {
|
413
|
+
ArrayInvertedLists *ail = set_array_invlist (ivpq, ids);
|
414
|
+
for (size_t i = 0; i < ail->nlist; i++)
|
415
|
+
READVECTOR (ail->codes[i]);
|
416
|
+
} else {
|
417
|
+
read_InvertedLists (ivpq, f, io_flags);
|
418
|
+
}
|
419
|
+
|
420
|
+
if (ivpq->is_trained) {
|
421
|
+
// precomputed table not stored. It is cheaper to recompute it
|
422
|
+
ivpq->use_precomputed_table = 0;
|
423
|
+
if (ivpq->by_residual)
|
424
|
+
ivpq->precompute_table ();
|
425
|
+
if (ivfpqr) {
|
426
|
+
read_ProductQuantizer (&ivfpqr->refine_pq, f);
|
427
|
+
READVECTOR (ivfpqr->refine_codes);
|
428
|
+
READ1 (ivfpqr->k_factor);
|
429
|
+
}
|
430
|
+
}
|
431
|
+
return ivpq;
|
432
|
+
}
|
433
|
+
|
434
|
+
int read_old_fmt_hack = 0;
|
435
|
+
|
436
|
+
Index *read_index (IOReader *f, int io_flags) {
|
437
|
+
Index * idx = nullptr;
|
438
|
+
uint32_t h;
|
439
|
+
READ1 (h);
|
440
|
+
if (h == fourcc ("IxFI") || h == fourcc ("IxF2")) {
|
441
|
+
IndexFlat *idxf;
|
442
|
+
if (h == fourcc ("IxFI")) idxf = new IndexFlatIP ();
|
443
|
+
else idxf = new IndexFlatL2 ();
|
444
|
+
read_index_header (idxf, f);
|
445
|
+
READVECTOR (idxf->xb);
|
446
|
+
FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->d);
|
447
|
+
// leak!
|
448
|
+
idx = idxf;
|
449
|
+
} else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
|
450
|
+
IndexLSH * idxl = new IndexLSH ();
|
451
|
+
read_index_header (idxl, f);
|
452
|
+
READ1 (idxl->nbits);
|
453
|
+
READ1 (idxl->rotate_data);
|
454
|
+
READ1 (idxl->train_thresholds);
|
455
|
+
READVECTOR (idxl->thresholds);
|
456
|
+
READ1 (idxl->bytes_per_vec);
|
457
|
+
if (h == fourcc("IxHE")) {
|
458
|
+
FAISS_THROW_IF_NOT_FMT (idxl->nbits % 64 == 0,
|
459
|
+
"can only read old format IndexLSH with "
|
460
|
+
"nbits multiple of 64 (got %d)",
|
461
|
+
(int) idxl->nbits);
|
462
|
+
// leak
|
463
|
+
idxl->bytes_per_vec *= 8;
|
464
|
+
}
|
465
|
+
{
|
466
|
+
RandomRotationMatrix *rrot = dynamic_cast<RandomRotationMatrix *>
|
467
|
+
(read_VectorTransform (f));
|
468
|
+
FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation");
|
469
|
+
idxl->rrot = *rrot;
|
470
|
+
delete rrot;
|
471
|
+
}
|
472
|
+
READVECTOR (idxl->codes);
|
473
|
+
FAISS_THROW_IF_NOT (idxl->rrot.d_in == idxl->d &&
|
474
|
+
idxl->rrot.d_out == idxl->nbits);
|
475
|
+
FAISS_THROW_IF_NOT (
|
476
|
+
idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
|
477
|
+
idx = idxl;
|
478
|
+
} else if (h == fourcc ("IxPQ") || h == fourcc ("IxPo") ||
|
479
|
+
h == fourcc ("IxPq")) {
|
480
|
+
// IxPQ and IxPo were merged into the same IndexPQ object
|
481
|
+
IndexPQ * idxp =new IndexPQ ();
|
482
|
+
read_index_header (idxp, f);
|
483
|
+
read_ProductQuantizer (&idxp->pq, f);
|
484
|
+
READVECTOR (idxp->codes);
|
485
|
+
if (h == fourcc ("IxPo") || h == fourcc ("IxPq")) {
|
486
|
+
READ1 (idxp->search_type);
|
487
|
+
READ1 (idxp->encode_signs);
|
488
|
+
READ1 (idxp->polysemous_ht);
|
489
|
+
}
|
490
|
+
// Old versoins of PQ all had metric_type set to INNER_PRODUCT
|
491
|
+
// when they were in fact using L2. Therefore, we force metric type
|
492
|
+
// to L2 when the old format is detected
|
493
|
+
if (h == fourcc ("IxPQ") || h == fourcc ("IxPo")) {
|
494
|
+
idxp->metric_type = METRIC_L2;
|
495
|
+
}
|
496
|
+
idx = idxp;
|
497
|
+
} else if (h == fourcc ("IvFl") || h == fourcc("IvFL")) { // legacy
|
498
|
+
IndexIVFFlat * ivfl = new IndexIVFFlat ();
|
499
|
+
std::vector<std::vector<Index::idx_t> > ids;
|
500
|
+
read_ivf_header (ivfl, f, &ids);
|
501
|
+
ivfl->code_size = ivfl->d * sizeof(float);
|
502
|
+
ArrayInvertedLists *ail = set_array_invlist (ivfl, ids);
|
503
|
+
|
504
|
+
if (h == fourcc ("IvFL")) {
|
505
|
+
for (size_t i = 0; i < ivfl->nlist; i++) {
|
506
|
+
READVECTOR (ail->codes[i]);
|
507
|
+
}
|
508
|
+
} else { // old format
|
509
|
+
for (size_t i = 0; i < ivfl->nlist; i++) {
|
510
|
+
std::vector<float> vec;
|
511
|
+
READVECTOR (vec);
|
512
|
+
ail->codes[i].resize(vec.size() * sizeof(float));
|
513
|
+
memcpy(ail->codes[i].data(), vec.data(),
|
514
|
+
ail->codes[i].size());
|
515
|
+
}
|
516
|
+
}
|
517
|
+
idx = ivfl;
|
518
|
+
} else if (h == fourcc ("IwFd")) {
|
519
|
+
IndexIVFFlatDedup * ivfl = new IndexIVFFlatDedup ();
|
520
|
+
read_ivf_header (ivfl, f);
|
521
|
+
ivfl->code_size = ivfl->d * sizeof(float);
|
522
|
+
{
|
523
|
+
std::vector<Index::idx_t> tab;
|
524
|
+
READVECTOR (tab);
|
525
|
+
for (long i = 0; i < tab.size(); i += 2) {
|
526
|
+
std::pair<Index::idx_t, Index::idx_t>
|
527
|
+
pair (tab[i], tab[i + 1]);
|
528
|
+
ivfl->instances.insert (pair);
|
529
|
+
}
|
530
|
+
}
|
531
|
+
read_InvertedLists (ivfl, f, io_flags);
|
532
|
+
idx = ivfl;
|
533
|
+
} else if (h == fourcc ("IwFl")) {
|
534
|
+
IndexIVFFlat * ivfl = new IndexIVFFlat ();
|
535
|
+
read_ivf_header (ivfl, f);
|
536
|
+
ivfl->code_size = ivfl->d * sizeof(float);
|
537
|
+
read_InvertedLists (ivfl, f, io_flags);
|
538
|
+
idx = ivfl;
|
539
|
+
} else if (h == fourcc ("IxSQ")) {
|
540
|
+
IndexScalarQuantizer * idxs = new IndexScalarQuantizer ();
|
541
|
+
read_index_header (idxs, f);
|
542
|
+
read_ScalarQuantizer (&idxs->sq, f);
|
543
|
+
READVECTOR (idxs->codes);
|
544
|
+
idxs->code_size = idxs->sq.code_size;
|
545
|
+
idx = idxs;
|
546
|
+
} else if (h == fourcc ("IxLa")) {
|
547
|
+
int d, nsq, scale_nbit, r2;
|
548
|
+
READ1 (d);
|
549
|
+
READ1 (nsq);
|
550
|
+
READ1 (scale_nbit);
|
551
|
+
READ1 (r2);
|
552
|
+
IndexLattice *idxl = new IndexLattice (d, nsq, scale_nbit, r2);
|
553
|
+
read_index_header (idxl, f);
|
554
|
+
READVECTOR (idxl->trained);
|
555
|
+
idx = idxl;
|
556
|
+
} else if(h == fourcc ("IvSQ")) { // legacy
|
557
|
+
IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
|
558
|
+
std::vector<std::vector<Index::idx_t> > ids;
|
559
|
+
read_ivf_header (ivsc, f, &ids);
|
560
|
+
read_ScalarQuantizer (&ivsc->sq, f);
|
561
|
+
READ1 (ivsc->code_size);
|
562
|
+
ArrayInvertedLists *ail = set_array_invlist (ivsc, ids);
|
563
|
+
for(int i = 0; i < ivsc->nlist; i++)
|
564
|
+
READVECTOR (ail->codes[i]);
|
565
|
+
idx = ivsc;
|
566
|
+
} else if(h == fourcc ("IwSQ") || h == fourcc ("IwSq")) {
|
567
|
+
IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
|
568
|
+
read_ivf_header (ivsc, f);
|
569
|
+
read_ScalarQuantizer (&ivsc->sq, f);
|
570
|
+
READ1 (ivsc->code_size);
|
571
|
+
if (h == fourcc ("IwSQ")) {
|
572
|
+
ivsc->by_residual = true;
|
573
|
+
} else {
|
574
|
+
READ1 (ivsc->by_residual);
|
575
|
+
}
|
576
|
+
read_InvertedLists (ivsc, f, io_flags);
|
577
|
+
idx = ivsc;
|
578
|
+
} else if(h == fourcc ("IwSh")) {
|
579
|
+
IndexIVFSpectralHash *ivsp = new IndexIVFSpectralHash ();
|
580
|
+
read_ivf_header (ivsp, f);
|
581
|
+
ivsp->vt = read_VectorTransform (f);
|
582
|
+
ivsp->own_fields = true;
|
583
|
+
READ1 (ivsp->nbit);
|
584
|
+
// not stored by write_ivf_header
|
585
|
+
ivsp->code_size = (ivsp->nbit + 7) / 8;
|
586
|
+
READ1 (ivsp->period);
|
587
|
+
READ1 (ivsp->threshold_type);
|
588
|
+
READVECTOR (ivsp->trained);
|
589
|
+
read_InvertedLists (ivsp, f, io_flags);
|
590
|
+
idx = ivsp;
|
591
|
+
} else if(h == fourcc ("IvPQ") || h == fourcc ("IvQR") ||
|
592
|
+
h == fourcc ("IwPQ") || h == fourcc ("IwQR")) {
|
593
|
+
|
594
|
+
idx = read_ivfpq (f, h, io_flags);
|
595
|
+
|
596
|
+
} else if(h == fourcc ("IxPT")) {
|
597
|
+
IndexPreTransform * ixpt = new IndexPreTransform();
|
598
|
+
ixpt->own_fields = true;
|
599
|
+
read_index_header (ixpt, f);
|
600
|
+
int nt;
|
601
|
+
if (read_old_fmt_hack == 2) {
|
602
|
+
nt = 1;
|
603
|
+
} else {
|
604
|
+
READ1 (nt);
|
605
|
+
}
|
606
|
+
for (int i = 0; i < nt; i++) {
|
607
|
+
ixpt->chain.push_back (read_VectorTransform (f));
|
608
|
+
}
|
609
|
+
ixpt->index = read_index (f, io_flags);
|
610
|
+
idx = ixpt;
|
611
|
+
} else if(h == fourcc ("Imiq")) {
|
612
|
+
MultiIndexQuantizer * imiq = new MultiIndexQuantizer ();
|
613
|
+
read_index_header (imiq, f);
|
614
|
+
read_ProductQuantizer (&imiq->pq, f);
|
615
|
+
idx = imiq;
|
616
|
+
} else if(h == fourcc ("IxRF")) {
|
617
|
+
IndexRefineFlat *idxrf = new IndexRefineFlat ();
|
618
|
+
read_index_header (idxrf, f);
|
619
|
+
idxrf->base_index = read_index(f, io_flags);
|
620
|
+
idxrf->own_fields = true;
|
621
|
+
IndexFlat *rf = dynamic_cast<IndexFlat*> (read_index (f, io_flags));
|
622
|
+
std::swap (*rf, idxrf->refine_index);
|
623
|
+
delete rf;
|
624
|
+
READ1 (idxrf->k_factor);
|
625
|
+
idx = idxrf;
|
626
|
+
} else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) {
|
627
|
+
bool is_map2 = h == fourcc ("IxM2");
|
628
|
+
IndexIDMap * idxmap = is_map2 ? new IndexIDMap2 () : new IndexIDMap ();
|
629
|
+
read_index_header (idxmap, f);
|
630
|
+
idxmap->index = read_index (f, io_flags);
|
631
|
+
idxmap->own_fields = true;
|
632
|
+
READVECTOR (idxmap->id_map);
|
633
|
+
if (is_map2) {
|
634
|
+
static_cast<IndexIDMap2*>(idxmap)->construct_rev_map ();
|
635
|
+
}
|
636
|
+
idx = idxmap;
|
637
|
+
} else if (h == fourcc ("Ix2L")) {
|
638
|
+
Index2Layer * idxp = new Index2Layer ();
|
639
|
+
read_index_header (idxp, f);
|
640
|
+
idxp->q1.quantizer = read_index (f, io_flags);
|
641
|
+
READ1 (idxp->q1.nlist);
|
642
|
+
READ1 (idxp->q1.quantizer_trains_alone);
|
643
|
+
read_ProductQuantizer (&idxp->pq, f);
|
644
|
+
READ1 (idxp->code_size_1);
|
645
|
+
READ1 (idxp->code_size_2);
|
646
|
+
READ1 (idxp->code_size);
|
647
|
+
READVECTOR (idxp->codes);
|
648
|
+
idx = idxp;
|
649
|
+
} else if(h == fourcc("IHNf") || h == fourcc("IHNp") ||
|
650
|
+
h == fourcc("IHNs") || h == fourcc("IHN2")) {
|
651
|
+
IndexHNSW *idxhnsw = nullptr;
|
652
|
+
if (h == fourcc("IHNf")) idxhnsw = new IndexHNSWFlat ();
|
653
|
+
if (h == fourcc("IHNp")) idxhnsw = new IndexHNSWPQ ();
|
654
|
+
if (h == fourcc("IHNs")) idxhnsw = new IndexHNSWSQ ();
|
655
|
+
if (h == fourcc("IHN2")) idxhnsw = new IndexHNSW2Level ();
|
656
|
+
read_index_header (idxhnsw, f);
|
657
|
+
read_HNSW (&idxhnsw->hnsw, f);
|
658
|
+
idxhnsw->storage = read_index (f, io_flags);
|
659
|
+
idxhnsw->own_fields = true;
|
660
|
+
if (h == fourcc("IHNp")) {
|
661
|
+
dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table ();
|
662
|
+
}
|
663
|
+
idx = idxhnsw;
|
664
|
+
} else {
|
665
|
+
FAISS_THROW_FMT("Index type 0x%08x not supported\n", h);
|
666
|
+
idx = nullptr;
|
667
|
+
}
|
668
|
+
return idx;
|
669
|
+
}
|
670
|
+
|
671
|
+
|
672
|
+
Index *read_index (FILE * f, int io_flags) {
|
673
|
+
FileIOReader reader(f);
|
674
|
+
return read_index(&reader, io_flags);
|
675
|
+
}
|
676
|
+
|
677
|
+
Index *read_index (const char *fname, int io_flags) {
|
678
|
+
FileIOReader reader(fname);
|
679
|
+
Index *idx = read_index (&reader, io_flags);
|
680
|
+
return idx;
|
681
|
+
}
|
682
|
+
|
683
|
+
VectorTransform *read_VectorTransform (const char *fname) {
|
684
|
+
FileIOReader reader(fname);
|
685
|
+
VectorTransform *vt = read_VectorTransform (&reader);
|
686
|
+
return vt;
|
687
|
+
}
|
688
|
+
|
689
|
+
|
690
|
+
|
691
|
+
/*************************************************************
|
692
|
+
* Read binary indexes
|
693
|
+
**************************************************************/
|
694
|
+
|
695
|
+
static void read_InvertedLists (
|
696
|
+
IndexBinaryIVF *ivf, IOReader *f, int io_flags) {
|
697
|
+
InvertedLists *ils = read_InvertedLists (f, io_flags);
|
698
|
+
FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist &&
|
699
|
+
ils->code_size == ivf->code_size));
|
700
|
+
ivf->invlists = ils;
|
701
|
+
ivf->own_invlists = true;
|
702
|
+
}
|
703
|
+
|
704
|
+
|
705
|
+
|
706
|
+
static void read_index_binary_header (IndexBinary *idx, IOReader *f) {
|
707
|
+
READ1 (idx->d);
|
708
|
+
READ1 (idx->code_size);
|
709
|
+
READ1 (idx->ntotal);
|
710
|
+
READ1 (idx->is_trained);
|
711
|
+
READ1 (idx->metric_type);
|
712
|
+
idx->verbose = false;
|
713
|
+
}
|
714
|
+
|
715
|
+
static void read_binary_ivf_header (
|
716
|
+
IndexBinaryIVF *ivf, IOReader *f,
|
717
|
+
std::vector<std::vector<Index::idx_t> > *ids = nullptr)
|
718
|
+
{
|
719
|
+
read_index_binary_header (ivf, f);
|
720
|
+
READ1 (ivf->nlist);
|
721
|
+
READ1 (ivf->nprobe);
|
722
|
+
ivf->quantizer = read_index_binary (f);
|
723
|
+
ivf->own_fields = true;
|
724
|
+
if (ids) { // used in legacy "Iv" formats
|
725
|
+
ids->resize (ivf->nlist);
|
726
|
+
for (size_t i = 0; i < ivf->nlist; i++)
|
727
|
+
READVECTOR ((*ids)[i]);
|
728
|
+
}
|
729
|
+
READ1 (ivf->maintain_direct_map);
|
730
|
+
READVECTOR (ivf->direct_map);
|
731
|
+
}
|
732
|
+
|
733
|
+
IndexBinary *read_index_binary (IOReader *f, int io_flags) {
|
734
|
+
IndexBinary * idx = nullptr;
|
735
|
+
uint32_t h;
|
736
|
+
READ1 (h);
|
737
|
+
if (h == fourcc ("IBxF")) {
|
738
|
+
IndexBinaryFlat *idxf = new IndexBinaryFlat ();
|
739
|
+
read_index_binary_header (idxf, f);
|
740
|
+
READVECTOR (idxf->xb);
|
741
|
+
FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->code_size);
|
742
|
+
// leak!
|
743
|
+
idx = idxf;
|
744
|
+
} else if (h == fourcc ("IBwF")) {
|
745
|
+
IndexBinaryIVF *ivf = new IndexBinaryIVF ();
|
746
|
+
read_binary_ivf_header (ivf, f);
|
747
|
+
read_InvertedLists (ivf, f, io_flags);
|
748
|
+
idx = ivf;
|
749
|
+
} else if (h == fourcc ("IBFf")) {
|
750
|
+
IndexBinaryFromFloat *idxff = new IndexBinaryFromFloat ();
|
751
|
+
read_index_binary_header (idxff, f);
|
752
|
+
idxff->own_fields = true;
|
753
|
+
idxff->index = read_index (f, io_flags);
|
754
|
+
idx = idxff;
|
755
|
+
} else if (h == fourcc ("IBHf")) {
|
756
|
+
IndexBinaryHNSW *idxhnsw = new IndexBinaryHNSW ();
|
757
|
+
read_index_binary_header (idxhnsw, f);
|
758
|
+
read_HNSW (&idxhnsw->hnsw, f);
|
759
|
+
idxhnsw->storage = read_index_binary (f, io_flags);
|
760
|
+
idxhnsw->own_fields = true;
|
761
|
+
idx = idxhnsw;
|
762
|
+
} else if(h == fourcc ("IBMp") || h == fourcc ("IBM2")) {
|
763
|
+
bool is_map2 = h == fourcc ("IBM2");
|
764
|
+
IndexBinaryIDMap * idxmap = is_map2 ?
|
765
|
+
new IndexBinaryIDMap2 () : new IndexBinaryIDMap ();
|
766
|
+
read_index_binary_header (idxmap, f);
|
767
|
+
idxmap->index = read_index_binary (f, io_flags);
|
768
|
+
idxmap->own_fields = true;
|
769
|
+
READVECTOR (idxmap->id_map);
|
770
|
+
if (is_map2) {
|
771
|
+
static_cast<IndexBinaryIDMap2*>(idxmap)->construct_rev_map ();
|
772
|
+
}
|
773
|
+
idx = idxmap;
|
774
|
+
} else {
|
775
|
+
FAISS_THROW_FMT("Index type 0x%08x not supported\n", h);
|
776
|
+
idx = nullptr;
|
777
|
+
}
|
778
|
+
return idx;
|
779
|
+
}
|
780
|
+
|
781
|
+
IndexBinary *read_index_binary (FILE * f, int io_flags) {
|
782
|
+
FileIOReader reader(f);
|
783
|
+
return read_index_binary(&reader, io_flags);
|
784
|
+
}
|
785
|
+
|
786
|
+
IndexBinary *read_index_binary (const char *fname, int io_flags) {
|
787
|
+
FileIOReader reader(fname);
|
788
|
+
IndexBinary *idx = read_index_binary (&reader, io_flags);
|
789
|
+
return idx;
|
790
|
+
}
|
791
|
+
|
792
|
+
|
793
|
+
} // namespace faiss
|