faiss 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +334 -195
- data/vendor/faiss/faiss/Clustering.h +88 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +139 -164
- data/vendor/faiss/faiss/Index2Layer.h +22 -22
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +116 -147
- data/vendor/faiss/faiss/IndexFlat.h +35 -46
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +474 -454
- data/vendor/faiss/faiss/IndexIVF.h +146 -113
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +248 -250
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +457 -516
- data/vendor/faiss/faiss/IndexIVFPQ.h +74 -66
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +125 -133
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +19 -21
- data/vendor/faiss/faiss/IndexLSH.cpp +75 -96
- data/vendor/faiss/faiss/IndexLSH.h +21 -26
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +231 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +303 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +405 -464
- data/vendor/faiss/faiss/IndexPQ.h +64 -67
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +115 -131
- data/vendor/faiss/faiss/IndexRefine.h +22 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexResidual.cpp +291 -0
- data/vendor/faiss/faiss/IndexResidual.h +152 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +120 -155
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -45
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +652 -474
- data/vendor/faiss/faiss/VectorTransform.h +61 -89
- data/vendor/faiss/faiss/clone_index.cpp +77 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +197 -170
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +270 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +115 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +595 -611
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +672 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +172 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +682 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +448 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +130 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +648 -701
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +547 -479
- data/vendor/faiss/faiss/impl/index_write.cpp +497 -407
- data/vendor/faiss/faiss/impl/io.cpp +75 -94
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +40 -29
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +269 -218
- data/vendor/faiss/faiss/index_factory.h +6 -7
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +301 -310
- data/vendor/faiss/faiss/utils/distances.h +133 -118
- data/vendor/faiss/faiss/utils/distances_simd.cpp +456 -516
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +53 -48
- metadata +20 -2
|
@@ -11,42 +11,46 @@
|
|
|
11
11
|
* implementation of Hyper-parameter auto-tuning
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
|
+
#include <faiss/index_factory.h>
|
|
15
|
+
|
|
14
16
|
#include <faiss/AutoTune.h>
|
|
15
17
|
|
|
16
18
|
#include <cinttypes>
|
|
17
19
|
#include <cmath>
|
|
18
20
|
|
|
21
|
+
#include <regex>
|
|
22
|
+
|
|
19
23
|
#include <faiss/impl/FaissAssert.h>
|
|
20
|
-
#include <faiss/utils/utils.h>
|
|
21
24
|
#include <faiss/utils/random.h>
|
|
25
|
+
#include <faiss/utils/utils.h>
|
|
22
26
|
|
|
27
|
+
#include <faiss/Index2Layer.h>
|
|
23
28
|
#include <faiss/IndexFlat.h>
|
|
24
|
-
#include <faiss/
|
|
25
|
-
#include <faiss/IndexPreTransform.h>
|
|
26
|
-
#include <faiss/IndexLSH.h>
|
|
27
|
-
#include <faiss/IndexPQ.h>
|
|
29
|
+
#include <faiss/IndexHNSW.h>
|
|
28
30
|
#include <faiss/IndexIVF.h>
|
|
31
|
+
#include <faiss/IndexIVFFlat.h>
|
|
29
32
|
#include <faiss/IndexIVFPQ.h>
|
|
33
|
+
#include <faiss/IndexIVFPQFastScan.h>
|
|
30
34
|
#include <faiss/IndexIVFPQR.h>
|
|
31
|
-
#include <faiss/
|
|
32
|
-
#include <faiss/IndexIVFFlat.h>
|
|
33
|
-
#include <faiss/MetaIndexes.h>
|
|
34
|
-
#include <faiss/IndexScalarQuantizer.h>
|
|
35
|
-
#include <faiss/IndexHNSW.h>
|
|
35
|
+
#include <faiss/IndexLSH.h>
|
|
36
36
|
#include <faiss/IndexLattice.h>
|
|
37
|
+
#include <faiss/IndexNSG.h>
|
|
38
|
+
#include <faiss/IndexPQ.h>
|
|
37
39
|
#include <faiss/IndexPQFastScan.h>
|
|
38
|
-
#include <faiss/
|
|
40
|
+
#include <faiss/IndexPreTransform.h>
|
|
39
41
|
#include <faiss/IndexRefine.h>
|
|
40
|
-
|
|
42
|
+
#include <faiss/IndexResidual.h>
|
|
43
|
+
#include <faiss/IndexScalarQuantizer.h>
|
|
44
|
+
#include <faiss/MetaIndexes.h>
|
|
45
|
+
#include <faiss/VectorTransform.h>
|
|
41
46
|
|
|
42
47
|
#include <faiss/IndexBinaryFlat.h>
|
|
43
48
|
#include <faiss/IndexBinaryHNSW.h>
|
|
44
|
-
#include <faiss/IndexBinaryIVF.h>
|
|
45
49
|
#include <faiss/IndexBinaryHash.h>
|
|
50
|
+
#include <faiss/IndexBinaryIVF.h>
|
|
46
51
|
|
|
47
52
|
namespace faiss {
|
|
48
53
|
|
|
49
|
-
|
|
50
54
|
/***************************************************************
|
|
51
55
|
* index_factory
|
|
52
56
|
***************************************************************/
|
|
@@ -54,42 +58,48 @@ namespace faiss {
|
|
|
54
58
|
namespace {
|
|
55
59
|
|
|
56
60
|
struct VTChain {
|
|
57
|
-
std::vector<VectorTransform
|
|
58
|
-
~VTChain
|
|
61
|
+
std::vector<VectorTransform*> chain;
|
|
62
|
+
~VTChain() {
|
|
59
63
|
for (int i = 0; i < chain.size(); i++) {
|
|
60
64
|
delete chain[i];
|
|
61
65
|
}
|
|
62
66
|
}
|
|
63
67
|
};
|
|
64
68
|
|
|
65
|
-
|
|
66
69
|
/// what kind of training does this coarse quantizer require?
|
|
67
|
-
char get_trains_alone(const Index
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
70
|
+
char get_trains_alone(const Index* coarse_quantizer) {
|
|
71
|
+
if (dynamic_cast<const IndexFlat*>(coarse_quantizer)) {
|
|
72
|
+
return 0;
|
|
73
|
+
}
|
|
74
|
+
// multi index just needs to be quantized
|
|
75
|
+
if (dynamic_cast<const MultiIndexQuantizer*>(coarse_quantizer) ||
|
|
76
|
+
dynamic_cast<const ResidualCoarseQuantizer*>(coarse_quantizer)) {
|
|
77
|
+
return 1;
|
|
78
|
+
}
|
|
79
|
+
if (dynamic_cast<const IndexHNSWFlat*>(coarse_quantizer)) {
|
|
80
|
+
return 2;
|
|
81
|
+
}
|
|
82
|
+
return 2; // for complicated indexes, we assume they can't be used as a
|
|
83
|
+
// kmeans index
|
|
74
84
|
}
|
|
75
85
|
|
|
76
|
-
bool str_ends_with(const std::string& s, const std::string& suffix)
|
|
77
|
-
|
|
78
|
-
return s.rfind(suffix) == std::abs(int(s.size()-suffix.size()));
|
|
86
|
+
bool str_ends_with(const std::string& s, const std::string& suffix) {
|
|
87
|
+
return s.rfind(suffix) == std::abs(int(s.size() - suffix.size()));
|
|
79
88
|
}
|
|
80
89
|
|
|
81
90
|
// check if ends with suffix followed by digits
|
|
82
|
-
bool str_ends_with_digits(const std::string& s, const std::string& suffix)
|
|
83
|
-
{
|
|
91
|
+
bool str_ends_with_digits(const std::string& s, const std::string& suffix) {
|
|
84
92
|
int i;
|
|
85
|
-
for(i = s.length() - 1; i >= 0; i--) {
|
|
86
|
-
if (!isdigit(s[i]))
|
|
93
|
+
for (i = s.length() - 1; i >= 0; i--) {
|
|
94
|
+
if (!isdigit(s[i]))
|
|
95
|
+
break;
|
|
87
96
|
}
|
|
88
97
|
return str_ends_with(s.substr(0, i + 1), suffix);
|
|
89
98
|
}
|
|
90
99
|
|
|
91
|
-
void find_matching_parentheses(const std::string
|
|
100
|
+
void find_matching_parentheses(const std::string& s, int& i0, int& i1) {
|
|
92
101
|
int st = 0;
|
|
102
|
+
i0 = i1 = 0;
|
|
93
103
|
for (int i = 0; i < s.length(); i++) {
|
|
94
104
|
if (s[i] == '(') {
|
|
95
105
|
if (st == 0) {
|
|
@@ -105,32 +115,30 @@ void find_matching_parentheses(const std::string &s, int & i0, int & i1) {
|
|
|
105
115
|
return;
|
|
106
116
|
}
|
|
107
117
|
if (st < 0) {
|
|
108
|
-
FAISS_THROW_FMT(
|
|
118
|
+
FAISS_THROW_FMT(
|
|
119
|
+
"factory string %s: unbalanced parentheses", s.c_str());
|
|
109
120
|
}
|
|
110
121
|
}
|
|
111
|
-
|
|
112
122
|
}
|
|
113
|
-
FAISS_THROW_FMT(
|
|
114
|
-
|
|
123
|
+
FAISS_THROW_FMT(
|
|
124
|
+
"factory string %s: unbalanced parentheses st=%d", s.c_str(), st);
|
|
115
125
|
}
|
|
116
126
|
|
|
117
127
|
} // anonymous namespace
|
|
118
128
|
|
|
119
|
-
Index
|
|
120
|
-
|
|
121
|
-
FAISS_THROW_IF_NOT(metric == METRIC_L2 ||
|
|
122
|
-
metric == METRIC_INNER_PRODUCT);
|
|
129
|
+
Index* index_factory(int d, const char* description_in, MetricType metric) {
|
|
130
|
+
FAISS_THROW_IF_NOT(metric == METRIC_L2 || metric == METRIC_INNER_PRODUCT);
|
|
123
131
|
VTChain vts;
|
|
124
|
-
Index
|
|
132
|
+
Index* coarse_quantizer = nullptr;
|
|
125
133
|
std::string parenthesis_ivf, parenthesis_refine;
|
|
126
|
-
Index
|
|
134
|
+
Index* index = nullptr;
|
|
127
135
|
bool add_idmap = false;
|
|
128
136
|
int d_in = d;
|
|
129
137
|
|
|
130
138
|
ScopeDeleter1<Index> del_coarse_quantizer, del_index;
|
|
131
139
|
|
|
132
140
|
std::string description(description_in);
|
|
133
|
-
char
|
|
141
|
+
char* ptr;
|
|
134
142
|
|
|
135
143
|
// handle indexes in parentheses
|
|
136
144
|
while (description.find('(') != std::string::npos) {
|
|
@@ -153,11 +161,11 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
|
153
161
|
int64_t ncentroids = -1;
|
|
154
162
|
bool use_2layer = false;
|
|
155
163
|
int hnsw_M = -1;
|
|
164
|
+
int nsg_R = -1;
|
|
156
165
|
|
|
157
|
-
for (char
|
|
158
|
-
tok
|
|
159
|
-
|
|
160
|
-
int d_out, opq_M, nbit, M, M2, pq_m, ncent, r2;
|
|
166
|
+
for (char* tok = strtok_r(&description[0], " ,", &ptr); tok;
|
|
167
|
+
tok = strtok_r(nullptr, " ,", &ptr)) {
|
|
168
|
+
int d_out, opq_M, nbit, M, M2, pq_m, ncent, r2, R;
|
|
161
169
|
std::string stok(tok);
|
|
162
170
|
nbit = 8;
|
|
163
171
|
int bbs = -1;
|
|
@@ -166,234 +174,276 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
|
166
174
|
// to avoid mem leaks with exceptions:
|
|
167
175
|
// do all tests before any instanciation
|
|
168
176
|
|
|
169
|
-
VectorTransform
|
|
170
|
-
Index
|
|
171
|
-
Index
|
|
177
|
+
VectorTransform* vt_1 = nullptr;
|
|
178
|
+
Index* coarse_quantizer_1 = nullptr;
|
|
179
|
+
Index* index_1 = nullptr;
|
|
172
180
|
|
|
173
181
|
// VectorTransforms
|
|
174
|
-
if (sscanf
|
|
175
|
-
vt_1 = new PCAMatrix
|
|
182
|
+
if (sscanf(tok, "PCA%d", &d_out) == 1) {
|
|
183
|
+
vt_1 = new PCAMatrix(d, d_out);
|
|
176
184
|
d = d_out;
|
|
177
|
-
} else if (sscanf
|
|
178
|
-
vt_1 = new PCAMatrix
|
|
185
|
+
} else if (sscanf(tok, "PCAR%d", &d_out) == 1) {
|
|
186
|
+
vt_1 = new PCAMatrix(d, d_out, 0, true);
|
|
179
187
|
d = d_out;
|
|
180
|
-
} else if (sscanf
|
|
181
|
-
vt_1 = new RandomRotationMatrix
|
|
188
|
+
} else if (sscanf(tok, "RR%d", &d_out) == 1) {
|
|
189
|
+
vt_1 = new RandomRotationMatrix(d, d_out);
|
|
182
190
|
d = d_out;
|
|
183
|
-
} else if (sscanf
|
|
184
|
-
vt_1 = new PCAMatrix
|
|
191
|
+
} else if (sscanf(tok, "PCAW%d", &d_out) == 1) {
|
|
192
|
+
vt_1 = new PCAMatrix(d, d_out, -0.5, false);
|
|
185
193
|
d = d_out;
|
|
186
|
-
} else if (sscanf
|
|
187
|
-
vt_1 = new PCAMatrix
|
|
194
|
+
} else if (sscanf(tok, "PCAWR%d", &d_out) == 1) {
|
|
195
|
+
vt_1 = new PCAMatrix(d, d_out, -0.5, true);
|
|
188
196
|
d = d_out;
|
|
189
|
-
} else if (sscanf
|
|
190
|
-
vt_1 = new OPQMatrix
|
|
197
|
+
} else if (sscanf(tok, "OPQ%d_%d", &opq_M, &d_out) == 2) {
|
|
198
|
+
vt_1 = new OPQMatrix(d, opq_M, d_out);
|
|
191
199
|
d = d_out;
|
|
192
|
-
} else if (sscanf
|
|
193
|
-
vt_1 = new OPQMatrix
|
|
194
|
-
} else if (sscanf
|
|
195
|
-
vt_1 = new ITQTransform
|
|
200
|
+
} else if (sscanf(tok, "OPQ%d", &opq_M) == 1) {
|
|
201
|
+
vt_1 = new OPQMatrix(d, opq_M);
|
|
202
|
+
} else if (sscanf(tok, "ITQ%d", &d_out) == 1) {
|
|
203
|
+
vt_1 = new ITQTransform(d, d_out, true);
|
|
196
204
|
d = d_out;
|
|
197
205
|
} else if (stok == "ITQ") {
|
|
198
|
-
vt_1 = new ITQTransform
|
|
199
|
-
} else if (sscanf
|
|
206
|
+
vt_1 = new ITQTransform(d, d, false);
|
|
207
|
+
} else if (sscanf(tok, "Pad%d", &d_out) == 1) {
|
|
200
208
|
if (d_out > d) {
|
|
201
|
-
vt_1 = new RemapDimensionsTransform
|
|
209
|
+
vt_1 = new RemapDimensionsTransform(d, d_out, false);
|
|
202
210
|
d = d_out;
|
|
203
211
|
}
|
|
204
212
|
} else if (stok == "L2norm") {
|
|
205
|
-
vt_1 = new NormalizationTransform
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
} else if (
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
213
|
+
vt_1 = new NormalizationTransform(d, 2.0);
|
|
214
|
+
|
|
215
|
+
// coarse quantizers
|
|
216
|
+
} else if (
|
|
217
|
+
!coarse_quantizer &&
|
|
218
|
+
sscanf(tok, "IVF%" PRId64 "_HNSW%d", &ncentroids, &M) == 2) {
|
|
219
|
+
coarse_quantizer_1 = new IndexHNSWFlat(d, M, metric);
|
|
220
|
+
|
|
221
|
+
} else if (
|
|
222
|
+
!coarse_quantizer &&
|
|
223
|
+
sscanf(tok, "IVF%" PRId64 "_NSG%d", &ncentroids, &R) == 2) {
|
|
224
|
+
coarse_quantizer_1 = new IndexNSGFlat(d, R, metric);
|
|
225
|
+
|
|
226
|
+
} else if (
|
|
227
|
+
!coarse_quantizer &&
|
|
228
|
+
sscanf(tok, "IVF%" PRId64, &ncentroids) == 1) {
|
|
214
229
|
if (!parenthesis_ivf.empty()) {
|
|
215
230
|
coarse_quantizer_1 =
|
|
216
|
-
|
|
217
|
-
|
|
231
|
+
index_factory(d, parenthesis_ivf.c_str(), metric);
|
|
218
232
|
} else if (metric == METRIC_L2) {
|
|
219
|
-
coarse_quantizer_1 = new IndexFlatL2
|
|
233
|
+
coarse_quantizer_1 = new IndexFlatL2(d);
|
|
220
234
|
} else {
|
|
221
|
-
coarse_quantizer_1 = new IndexFlatIP
|
|
235
|
+
coarse_quantizer_1 = new IndexFlatIP(d);
|
|
222
236
|
}
|
|
223
237
|
|
|
224
|
-
} else if (!coarse_quantizer && sscanf
|
|
225
|
-
FAISS_THROW_IF_NOT_MSG
|
|
226
|
-
|
|
227
|
-
|
|
238
|
+
} else if (!coarse_quantizer && sscanf(tok, "IMI2x%d", &nbit) == 1) {
|
|
239
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
240
|
+
metric == METRIC_L2,
|
|
241
|
+
"MultiIndex not implemented for inner prod search");
|
|
242
|
+
coarse_quantizer_1 = new MultiIndexQuantizer(d, 2, nbit);
|
|
228
243
|
ncentroids = 1 << (2 * nbit);
|
|
229
244
|
|
|
230
|
-
} else if (
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
245
|
+
} else if (
|
|
246
|
+
!coarse_quantizer &&
|
|
247
|
+
sscanf(tok, "Residual%dx%d", &M, &nbit) == 2) {
|
|
248
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
249
|
+
metric == METRIC_L2,
|
|
250
|
+
"MultiIndex not implemented for inner prod search");
|
|
251
|
+
coarse_quantizer_1 = new MultiIndexQuantizer(d, M, nbit);
|
|
235
252
|
ncentroids = int64_t(1) << (M * nbit);
|
|
236
253
|
use_2layer = true;
|
|
237
254
|
|
|
238
|
-
} else if (
|
|
239
|
-
|
|
240
|
-
|
|
255
|
+
} else if (std::regex_match(
|
|
256
|
+
stok,
|
|
257
|
+
std::regex(
|
|
258
|
+
"(RQ|RCQ)[0-9]+x[0-9]+(_[0-9]+x[0-9]+)*"))) {
|
|
259
|
+
std::vector<size_t> nbits;
|
|
260
|
+
std::smatch sm;
|
|
261
|
+
bool is_RCQ = stok.find("RCQ") == 0;
|
|
262
|
+
while (std::regex_search(
|
|
263
|
+
stok, sm, std::regex("([0-9]+)x([0-9]+)"))) {
|
|
264
|
+
int M = std::stoi(sm[1].str());
|
|
265
|
+
int nbit = std::stoi(sm[2].str());
|
|
266
|
+
nbits.resize(nbits.size() + M, nbit);
|
|
267
|
+
stok = sm.suffix();
|
|
268
|
+
}
|
|
269
|
+
if (!is_RCQ) {
|
|
270
|
+
index_1 = new IndexResidual(d, nbits, metric);
|
|
271
|
+
} else {
|
|
272
|
+
index_1 = new ResidualCoarseQuantizer(d, nbits, metric);
|
|
273
|
+
}
|
|
274
|
+
} else if (
|
|
275
|
+
!coarse_quantizer &&
|
|
276
|
+
sscanf(tok, "Residual%" PRId64, &ncentroids) == 1) {
|
|
277
|
+
coarse_quantizer_1 = new IndexFlatL2(d);
|
|
241
278
|
use_2layer = true;
|
|
242
279
|
|
|
243
280
|
} else if (stok == "IDMap") {
|
|
244
281
|
add_idmap = true;
|
|
245
282
|
|
|
246
|
-
|
|
283
|
+
// IVFs
|
|
247
284
|
} else if (!index && (stok == "Flat" || stok == "FlatDedup")) {
|
|
248
285
|
if (coarse_quantizer) {
|
|
249
286
|
// if there was an IVF in front, then it is an IVFFlat
|
|
250
|
-
IndexIVF
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
287
|
+
IndexIVF* index_ivf = stok == "Flat"
|
|
288
|
+
? new IndexIVFFlat(
|
|
289
|
+
coarse_quantizer, d, ncentroids, metric)
|
|
290
|
+
: new IndexIVFFlatDedup(
|
|
291
|
+
coarse_quantizer, d, ncentroids, metric);
|
|
255
292
|
index_ivf->quantizer_trains_alone =
|
|
256
|
-
|
|
293
|
+
get_trains_alone(coarse_quantizer);
|
|
257
294
|
index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT;
|
|
258
|
-
del_coarse_quantizer.release
|
|
295
|
+
del_coarse_quantizer.release();
|
|
259
296
|
index_ivf->own_fields = true;
|
|
260
297
|
index_1 = index_ivf;
|
|
261
298
|
} else if (hnsw_M > 0) {
|
|
262
|
-
index_1 = new IndexHNSWFlat
|
|
299
|
+
index_1 = new IndexHNSWFlat(d, hnsw_M, metric);
|
|
300
|
+
} else if (nsg_R > 0) {
|
|
301
|
+
index_1 = new IndexNSGFlat(d, nsg_R, metric);
|
|
263
302
|
} else {
|
|
264
|
-
FAISS_THROW_IF_NOT_MSG
|
|
265
|
-
|
|
266
|
-
|
|
303
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
304
|
+
stok != "FlatDedup",
|
|
305
|
+
"dedup supported only for IVFFlat");
|
|
306
|
+
index_1 = new IndexFlat(d, metric);
|
|
267
307
|
}
|
|
268
|
-
} else if (
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
308
|
+
} else if (
|
|
309
|
+
!index &&
|
|
310
|
+
(stok == "SQ8" || stok == "SQ4" || stok == "SQ6" ||
|
|
311
|
+
stok == "SQfp16")) {
|
|
312
|
+
ScalarQuantizer::QuantizerType qt = stok == "SQ8"
|
|
313
|
+
? ScalarQuantizer::QT_8bit
|
|
314
|
+
: stok == "SQ6" ? ScalarQuantizer::QT_6bit
|
|
315
|
+
: stok == "SQ4" ? ScalarQuantizer::QT_4bit
|
|
316
|
+
: stok == "SQfp16" ? ScalarQuantizer::QT_fp16
|
|
317
|
+
: ScalarQuantizer::QT_4bit;
|
|
276
318
|
if (coarse_quantizer) {
|
|
277
|
-
FAISS_THROW_IF_NOT
|
|
278
|
-
IndexIVFScalarQuantizer
|
|
279
|
-
|
|
280
|
-
|
|
319
|
+
FAISS_THROW_IF_NOT(!use_2layer);
|
|
320
|
+
IndexIVFScalarQuantizer* index_ivf =
|
|
321
|
+
new IndexIVFScalarQuantizer(
|
|
322
|
+
coarse_quantizer, d, ncentroids, qt, metric);
|
|
281
323
|
index_ivf->quantizer_trains_alone =
|
|
282
|
-
|
|
283
|
-
del_coarse_quantizer.release
|
|
324
|
+
get_trains_alone(coarse_quantizer);
|
|
325
|
+
del_coarse_quantizer.release();
|
|
284
326
|
index_ivf->own_fields = true;
|
|
285
327
|
index_1 = index_ivf;
|
|
286
328
|
} else if (hnsw_M > 0) {
|
|
287
329
|
index_1 = new IndexHNSWSQ(d, qt, hnsw_M, metric);
|
|
288
330
|
} else {
|
|
289
|
-
index_1 = new IndexScalarQuantizer
|
|
331
|
+
index_1 = new IndexScalarQuantizer(d, qt, metric);
|
|
290
332
|
}
|
|
291
|
-
} else if (!index && sscanf
|
|
292
|
-
FAISS_THROW_IF_NOT_MSG(
|
|
293
|
-
|
|
294
|
-
FAISS_THROW_IF_NOT_MSG(
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
333
|
+
} else if (!index && sscanf(tok, "PQ%d+%d", &M, &M2) == 2) {
|
|
334
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
335
|
+
coarse_quantizer, "PQ with + works only with an IVF");
|
|
336
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
337
|
+
metric == METRIC_L2,
|
|
338
|
+
"IVFPQR not implemented for inner product search");
|
|
339
|
+
IndexIVFPQR* index_ivf = new IndexIVFPQR(
|
|
340
|
+
coarse_quantizer, d, ncentroids, M, 8, M2, 8);
|
|
298
341
|
index_ivf->quantizer_trains_alone =
|
|
299
|
-
get_trains_alone
|
|
300
|
-
del_coarse_quantizer.release
|
|
342
|
+
get_trains_alone(coarse_quantizer);
|
|
343
|
+
del_coarse_quantizer.release();
|
|
301
344
|
index_ivf->own_fields = true;
|
|
302
345
|
index_1 = index_ivf;
|
|
303
|
-
} else if (
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
346
|
+
} else if (
|
|
347
|
+
!index &&
|
|
348
|
+
(sscanf(tok, "PQ%dx4fs_%d", &M, &bbs) == 2 ||
|
|
349
|
+
(sscanf(tok, "PQ%dx4f%c", &M, &c) == 2 && c == 's') ||
|
|
350
|
+
(sscanf(tok, "PQ%dx4fs%c", &M, &c) == 2 && c == 'r'))) {
|
|
307
351
|
if (bbs == -1) {
|
|
308
352
|
bbs = 32;
|
|
309
353
|
}
|
|
310
354
|
bool by_residual = str_ends_with(stok, "fsr");
|
|
311
355
|
if (coarse_quantizer) {
|
|
312
|
-
IndexIVFPQFastScan
|
|
313
|
-
|
|
314
|
-
);
|
|
356
|
+
IndexIVFPQFastScan* index_ivf = new IndexIVFPQFastScan(
|
|
357
|
+
coarse_quantizer, d, ncentroids, M, 4, metric, bbs);
|
|
315
358
|
index_ivf->quantizer_trains_alone =
|
|
316
|
-
|
|
359
|
+
get_trains_alone(coarse_quantizer);
|
|
317
360
|
index_ivf->metric_type = metric;
|
|
318
361
|
index_ivf->by_residual = by_residual;
|
|
319
362
|
index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT;
|
|
320
|
-
del_coarse_quantizer.release
|
|
363
|
+
del_coarse_quantizer.release();
|
|
321
364
|
index_ivf->own_fields = true;
|
|
322
365
|
index_1 = index_ivf;
|
|
323
366
|
} else {
|
|
324
|
-
IndexPQFastScan
|
|
325
|
-
|
|
326
|
-
);
|
|
367
|
+
IndexPQFastScan* index_pq =
|
|
368
|
+
new IndexPQFastScan(d, M, 4, metric, bbs);
|
|
327
369
|
index_1 = index_pq;
|
|
328
370
|
}
|
|
329
|
-
} else if (
|
|
330
|
-
|
|
331
|
-
|
|
371
|
+
} else if (
|
|
372
|
+
!index &&
|
|
373
|
+
(sscanf(tok, "PQ%dx%d", &M, &nbit) == 2 ||
|
|
374
|
+
sscanf(tok, "PQ%d", &M) == 1 ||
|
|
375
|
+
sscanf(tok, "PQ%dnp", &M) == 1)) {
|
|
332
376
|
bool do_polysemous_training = stok.find("np") == std::string::npos;
|
|
333
377
|
if (coarse_quantizer) {
|
|
334
378
|
if (!use_2layer) {
|
|
335
|
-
IndexIVFPQ
|
|
336
|
-
|
|
379
|
+
IndexIVFPQ* index_ivf = new IndexIVFPQ(
|
|
380
|
+
coarse_quantizer, d, ncentroids, M, nbit);
|
|
337
381
|
index_ivf->quantizer_trains_alone =
|
|
338
|
-
|
|
382
|
+
get_trains_alone(coarse_quantizer);
|
|
339
383
|
index_ivf->metric_type = metric;
|
|
340
384
|
index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT;
|
|
341
|
-
del_coarse_quantizer.release
|
|
385
|
+
del_coarse_quantizer.release();
|
|
342
386
|
index_ivf->own_fields = true;
|
|
343
387
|
index_ivf->do_polysemous_training = do_polysemous_training;
|
|
344
388
|
index_1 = index_ivf;
|
|
345
389
|
} else {
|
|
346
|
-
Index2Layer
|
|
347
|
-
|
|
390
|
+
Index2Layer* index_2l = new Index2Layer(
|
|
391
|
+
coarse_quantizer, ncentroids, M, nbit);
|
|
348
392
|
index_2l->q1.quantizer_trains_alone =
|
|
349
|
-
|
|
393
|
+
get_trains_alone(coarse_quantizer);
|
|
350
394
|
index_2l->q1.own_fields = true;
|
|
351
395
|
index_1 = index_2l;
|
|
352
396
|
}
|
|
353
397
|
} else if (hnsw_M > 0) {
|
|
354
|
-
IndexHNSWPQ
|
|
398
|
+
IndexHNSWPQ* ipq = new IndexHNSWPQ(d, M, hnsw_M);
|
|
355
399
|
dynamic_cast<IndexPQ*>(ipq->storage)->do_polysemous_training =
|
|
356
|
-
|
|
400
|
+
do_polysemous_training;
|
|
357
401
|
index_1 = ipq;
|
|
358
402
|
} else {
|
|
359
|
-
IndexPQ
|
|
403
|
+
IndexPQ* index_pq = new IndexPQ(d, M, nbit, metric);
|
|
360
404
|
index_pq->do_polysemous_training = do_polysemous_training;
|
|
361
405
|
index_1 = index_pq;
|
|
362
406
|
}
|
|
363
|
-
} else if (
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
407
|
+
} else if (
|
|
408
|
+
!index &&
|
|
409
|
+
sscanf(tok, "HNSW%d_%d+PQ%d", &M, &ncent, &pq_m) == 3) {
|
|
410
|
+
Index* quant = new IndexFlatL2(d);
|
|
411
|
+
IndexHNSW2Level* hidx2l =
|
|
412
|
+
new IndexHNSW2Level(quant, ncent, pq_m, M);
|
|
413
|
+
Index2Layer* idx2l = dynamic_cast<Index2Layer*>(hidx2l->storage);
|
|
368
414
|
idx2l->q1.own_fields = true;
|
|
369
415
|
index_1 = hidx2l;
|
|
370
|
-
} else if (
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
416
|
+
} else if (
|
|
417
|
+
!index &&
|
|
418
|
+
sscanf(tok, "HNSW%d_2x%d+PQ%d", &M, &nbit, &pq_m) == 3) {
|
|
419
|
+
Index* quant = new MultiIndexQuantizer(d, 2, nbit);
|
|
420
|
+
IndexHNSW2Level* hidx2l =
|
|
421
|
+
new IndexHNSW2Level(quant, 1 << (2 * nbit), pq_m, M);
|
|
422
|
+
Index2Layer* idx2l = dynamic_cast<Index2Layer*>(hidx2l->storage);
|
|
376
423
|
idx2l->q1.own_fields = true;
|
|
377
424
|
idx2l->q1.quantizer_trains_alone = 1;
|
|
378
425
|
index_1 = hidx2l;
|
|
379
|
-
} else if (!index &&
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
} else if (!index &&
|
|
387
|
-
sscanf (tok, "HNSW%d", &M) == 1) {
|
|
426
|
+
} else if (!index && sscanf(tok, "HNSW%d_PQ%d", &M, &pq_m) == 2) {
|
|
427
|
+
index_1 = new IndexHNSWPQ(d, pq_m, M);
|
|
428
|
+
} else if (
|
|
429
|
+
!index && sscanf(tok, "HNSW%d_SQ%d", &M, &pq_m) == 2 &&
|
|
430
|
+
pq_m == 8) {
|
|
431
|
+
index_1 = new IndexHNSWSQ(d, ScalarQuantizer::QT_8bit, M);
|
|
432
|
+
} else if (!index && sscanf(tok, "HNSW%d", &M) == 1) {
|
|
388
433
|
hnsw_M = M;
|
|
389
434
|
// here it is unclear what we want: HNSW flat or HNSWx,Y ?
|
|
390
|
-
} else if (!index && (
|
|
391
|
-
|
|
435
|
+
} else if (!index && sscanf(tok, "NSG%d", &R) == 1) {
|
|
436
|
+
nsg_R = R;
|
|
437
|
+
} else if (
|
|
438
|
+
!index &&
|
|
439
|
+
(stok == "LSH" || stok == "LSHr" || stok == "LSHrt" ||
|
|
440
|
+
stok == "LSHt")) {
|
|
392
441
|
bool rotate_data = strstr(tok, "r") != nullptr;
|
|
393
442
|
bool train_thresholds = strstr(tok, "t") != nullptr;
|
|
394
|
-
index_1 = new IndexLSH
|
|
395
|
-
} else if (
|
|
396
|
-
|
|
443
|
+
index_1 = new IndexLSH(d, d, rotate_data, train_thresholds);
|
|
444
|
+
} else if (
|
|
445
|
+
!index &&
|
|
446
|
+
sscanf(tok, "ZnLattice%dx%d_%d", &M, &r2, &nbit) == 3) {
|
|
397
447
|
FAISS_THROW_IF_NOT(!coarse_quantizer);
|
|
398
448
|
index_1 = new IndexLattice(d, M, nbit, r2);
|
|
399
449
|
} else if (stok == "RFlat") {
|
|
@@ -401,67 +451,73 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
|
401
451
|
} else if (stok == "Refine") {
|
|
402
452
|
FAISS_THROW_IF_NOT_MSG(
|
|
403
453
|
!parenthesis_refine.empty(),
|
|
404
|
-
"Refine index should be provided in parentheses"
|
|
405
|
-
);
|
|
454
|
+
"Refine index should be provided in parentheses");
|
|
406
455
|
} else {
|
|
407
|
-
FAISS_THROW_FMT(
|
|
408
|
-
|
|
456
|
+
FAISS_THROW_FMT(
|
|
457
|
+
"could not parse token \"%s\" in %s\n",
|
|
458
|
+
tok,
|
|
459
|
+
description_in);
|
|
409
460
|
}
|
|
410
461
|
|
|
411
462
|
if (index_1 && add_idmap) {
|
|
412
|
-
IndexIDMap
|
|
413
|
-
del_index.set
|
|
463
|
+
IndexIDMap* idmap = new IndexIDMap(index_1);
|
|
464
|
+
del_index.set(idmap);
|
|
414
465
|
idmap->own_fields = true;
|
|
415
466
|
index_1 = idmap;
|
|
416
467
|
add_idmap = false;
|
|
417
468
|
}
|
|
418
469
|
|
|
419
|
-
if (vt_1)
|
|
420
|
-
vts.chain.push_back
|
|
470
|
+
if (vt_1) {
|
|
471
|
+
vts.chain.push_back(vt_1);
|
|
421
472
|
}
|
|
422
473
|
|
|
423
474
|
if (coarse_quantizer_1) {
|
|
424
475
|
coarse_quantizer = coarse_quantizer_1;
|
|
425
|
-
del_coarse_quantizer.set
|
|
476
|
+
del_coarse_quantizer.set(coarse_quantizer);
|
|
426
477
|
}
|
|
427
478
|
|
|
428
479
|
if (index_1) {
|
|
429
480
|
index = index_1;
|
|
430
|
-
del_index.set
|
|
481
|
+
del_index.set(index);
|
|
431
482
|
}
|
|
432
483
|
}
|
|
433
484
|
|
|
434
485
|
if (!index && hnsw_M > 0) {
|
|
435
|
-
index = new IndexHNSWFlat
|
|
436
|
-
del_index.set
|
|
486
|
+
index = new IndexHNSWFlat(d, hnsw_M, metric);
|
|
487
|
+
del_index.set(index);
|
|
488
|
+
} else if (!index && nsg_R > 0) {
|
|
489
|
+
index = new IndexNSGFlat(d, nsg_R, metric);
|
|
490
|
+
del_index.set(index);
|
|
437
491
|
}
|
|
438
492
|
|
|
439
|
-
FAISS_THROW_IF_NOT_FMT(
|
|
440
|
-
|
|
493
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
494
|
+
index, "description %s did not generate an index", description_in);
|
|
441
495
|
|
|
442
496
|
// nothing can go wrong now
|
|
443
|
-
del_index.release
|
|
444
|
-
del_coarse_quantizer.release
|
|
497
|
+
del_index.release();
|
|
498
|
+
del_coarse_quantizer.release();
|
|
445
499
|
|
|
446
500
|
if (add_idmap) {
|
|
447
|
-
fprintf(stderr,
|
|
501
|
+
fprintf(stderr,
|
|
502
|
+
"index_factory: WARNING: "
|
|
448
503
|
"IDMap option not used\n");
|
|
449
504
|
}
|
|
450
505
|
|
|
451
506
|
if (vts.chain.size() > 0) {
|
|
452
|
-
IndexPreTransform
|
|
507
|
+
IndexPreTransform* index_pt = new IndexPreTransform(index);
|
|
453
508
|
index_pt->own_fields = true;
|
|
454
509
|
// add from back
|
|
455
510
|
while (vts.chain.size() > 0) {
|
|
456
|
-
index_pt->prepend_transform
|
|
457
|
-
vts.chain.pop_back
|
|
511
|
+
index_pt->prepend_transform(vts.chain.back());
|
|
512
|
+
vts.chain.pop_back();
|
|
458
513
|
}
|
|
459
514
|
index = index_pt;
|
|
460
515
|
}
|
|
461
516
|
|
|
462
517
|
if (!parenthesis_refine.empty()) {
|
|
463
|
-
Index
|
|
464
|
-
|
|
518
|
+
Index* refine_index =
|
|
519
|
+
index_factory(d_in, parenthesis_refine.c_str(), metric);
|
|
520
|
+
IndexRefine* index_rf = new IndexRefine(index, refine_index);
|
|
465
521
|
index_rf->own_refine_index = true;
|
|
466
522
|
index_rf->own_fields = true;
|
|
467
523
|
index = index_rf;
|
|
@@ -470,48 +526,43 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|
|
470
526
|
return index;
|
|
471
527
|
}
|
|
472
528
|
|
|
473
|
-
IndexBinary
|
|
474
|
-
|
|
475
|
-
IndexBinary *index = nullptr;
|
|
529
|
+
IndexBinary* index_binary_factory(int d, const char* description) {
|
|
530
|
+
IndexBinary* index = nullptr;
|
|
476
531
|
|
|
477
532
|
int ncentroids = -1;
|
|
478
533
|
int M, nhash, b;
|
|
479
534
|
|
|
480
535
|
if (sscanf(description, "BIVF%d_HNSW%d", &ncentroids, &M) == 2) {
|
|
481
|
-
IndexBinaryIVF
|
|
482
|
-
|
|
483
|
-
);
|
|
536
|
+
IndexBinaryIVF* index_ivf =
|
|
537
|
+
new IndexBinaryIVF(new IndexBinaryHNSW(d, M), d, ncentroids);
|
|
484
538
|
index_ivf->own_fields = true;
|
|
485
539
|
index = index_ivf;
|
|
486
540
|
|
|
487
541
|
} else if (sscanf(description, "BIVF%d", &ncentroids) == 1) {
|
|
488
|
-
IndexBinaryIVF
|
|
489
|
-
|
|
490
|
-
);
|
|
542
|
+
IndexBinaryIVF* index_ivf =
|
|
543
|
+
new IndexBinaryIVF(new IndexBinaryFlat(d), d, ncentroids);
|
|
491
544
|
index_ivf->own_fields = true;
|
|
492
545
|
index = index_ivf;
|
|
493
546
|
|
|
494
547
|
} else if (sscanf(description, "BHNSW%d", &M) == 1) {
|
|
495
|
-
IndexBinaryHNSW
|
|
548
|
+
IndexBinaryHNSW* index_hnsw = new IndexBinaryHNSW(d, M);
|
|
496
549
|
index = index_hnsw;
|
|
497
550
|
|
|
498
551
|
} else if (sscanf(description, "BHash%dx%d", &nhash, &b) == 2) {
|
|
499
|
-
index = new IndexBinaryMultiHash
|
|
552
|
+
index = new IndexBinaryMultiHash(d, nhash, b);
|
|
500
553
|
|
|
501
554
|
} else if (sscanf(description, "BHash%d", &b) == 1) {
|
|
502
|
-
index = new IndexBinaryHash
|
|
555
|
+
index = new IndexBinaryHash(d, b);
|
|
503
556
|
|
|
504
557
|
} else if (std::string(description) == "BFlat") {
|
|
505
558
|
index = new IndexBinaryFlat(d);
|
|
506
559
|
|
|
507
560
|
} else {
|
|
508
|
-
FAISS_THROW_IF_NOT_FMT(
|
|
509
|
-
|
|
561
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
562
|
+
index, "description %s did not generate an index", description);
|
|
510
563
|
}
|
|
511
564
|
|
|
512
565
|
return index;
|
|
513
566
|
}
|
|
514
567
|
|
|
515
|
-
|
|
516
|
-
|
|
517
568
|
} // namespace faiss
|