faiss 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +1 -1
- data/README.md +7 -7
- data/ext/faiss/extconf.rb +6 -3
- data/ext/faiss/numo.hpp +4 -4
- data/ext/faiss/utils.cpp +1 -1
- data/ext/faiss/utils.h +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +292 -291
- data/vendor/faiss/faiss/AutoTune.h +55 -56
- data/vendor/faiss/faiss/Clustering.cpp +365 -194
- data/vendor/faiss/faiss/Clustering.h +102 -35
- data/vendor/faiss/faiss/IVFlib.cpp +171 -195
- data/vendor/faiss/faiss/IVFlib.h +48 -51
- data/vendor/faiss/faiss/Index.cpp +85 -103
- data/vendor/faiss/faiss/Index.h +54 -48
- data/vendor/faiss/faiss/Index2Layer.cpp +126 -224
- data/vendor/faiss/faiss/Index2Layer.h +22 -36
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +195 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +45 -37
- data/vendor/faiss/faiss/IndexBinary.h +140 -132
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +73 -53
- data/vendor/faiss/faiss/IndexBinaryFlat.h +29 -24
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +46 -43
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +16 -15
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +215 -232
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +25 -24
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +182 -177
- data/vendor/faiss/faiss/IndexBinaryHash.h +41 -34
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +489 -461
- data/vendor/faiss/faiss/IndexBinaryIVF.h +97 -68
- data/vendor/faiss/faiss/IndexFlat.cpp +115 -176
- data/vendor/faiss/faiss/IndexFlat.h +42 -59
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +372 -348
- data/vendor/faiss/faiss/IndexHNSW.h +57 -41
- data/vendor/faiss/faiss/IndexIVF.cpp +545 -453
- data/vendor/faiss/faiss/IndexIVF.h +169 -118
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +247 -252
- data/vendor/faiss/faiss/IndexIVFFlat.h +48 -51
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +459 -517
- data/vendor/faiss/faiss/IndexIVFPQ.h +75 -67
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +406 -372
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +82 -57
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +104 -102
- data/vendor/faiss/faiss/IndexIVFPQR.h +33 -28
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +163 -150
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +38 -25
- data/vendor/faiss/faiss/IndexLSH.cpp +66 -113
- data/vendor/faiss/faiss/IndexLSH.h +20 -38
- data/vendor/faiss/faiss/IndexLattice.cpp +42 -56
- data/vendor/faiss/faiss/IndexLattice.h +11 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +229 -0
- data/vendor/faiss/faiss/IndexNNDescent.h +72 -0
- data/vendor/faiss/faiss/IndexNSG.cpp +301 -0
- data/vendor/faiss/faiss/IndexNSG.h +85 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +387 -495
- data/vendor/faiss/faiss/IndexPQ.h +64 -82
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +143 -170
- data/vendor/faiss/faiss/IndexPQFastScan.h +46 -32
- data/vendor/faiss/faiss/IndexPreTransform.cpp +120 -150
- data/vendor/faiss/faiss/IndexPreTransform.h +33 -36
- data/vendor/faiss/faiss/IndexRefine.cpp +139 -127
- data/vendor/faiss/faiss/IndexRefine.h +32 -23
- data/vendor/faiss/faiss/IndexReplicas.cpp +147 -153
- data/vendor/faiss/faiss/IndexReplicas.h +62 -56
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +111 -172
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +41 -59
- data/vendor/faiss/faiss/IndexShards.cpp +256 -240
- data/vendor/faiss/faiss/IndexShards.h +85 -73
- data/vendor/faiss/faiss/MatrixStats.cpp +112 -97
- data/vendor/faiss/faiss/MatrixStats.h +7 -10
- data/vendor/faiss/faiss/MetaIndexes.cpp +135 -157
- data/vendor/faiss/faiss/MetaIndexes.h +40 -34
- data/vendor/faiss/faiss/MetricType.h +7 -7
- data/vendor/faiss/faiss/VectorTransform.cpp +654 -475
- data/vendor/faiss/faiss/VectorTransform.h +64 -89
- data/vendor/faiss/faiss/clone_index.cpp +78 -73
- data/vendor/faiss/faiss/clone_index.h +4 -9
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +33 -38
- data/vendor/faiss/faiss/gpu/GpuAutoTune.h +11 -9
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +198 -171
- data/vendor/faiss/faiss/gpu/GpuCloner.h +53 -35
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +12 -14
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +27 -25
- data/vendor/faiss/faiss/gpu/GpuDistance.h +116 -112
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -2
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +134 -137
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +76 -73
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +173 -162
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +67 -64
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +89 -86
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +150 -141
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +101 -103
- data/vendor/faiss/faiss/gpu/GpuIndicesOptions.h +17 -16
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +116 -128
- data/vendor/faiss/faiss/gpu/GpuResources.h +182 -186
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +433 -422
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +131 -130
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +468 -456
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +25 -19
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +22 -20
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +9 -8
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +39 -44
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +16 -14
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +77 -71
- data/vendor/faiss/faiss/gpu/perf/PerfIVFPQAdd.cpp +109 -88
- data/vendor/faiss/faiss/gpu/perf/WriteIndex.cpp +75 -64
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +230 -215
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +80 -86
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +284 -277
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +416 -416
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +611 -517
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +166 -164
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +61 -53
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +274 -238
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +73 -57
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +47 -50
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +79 -72
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +140 -146
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.h +69 -71
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +21 -16
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +25 -29
- data/vendor/faiss/faiss/gpu/utils/Timer.h +30 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +503 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +175 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +90 -120
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +81 -65
- data/vendor/faiss/faiss/impl/FaissAssert.h +73 -58
- data/vendor/faiss/faiss/impl/FaissException.cpp +56 -48
- data/vendor/faiss/faiss/impl/FaissException.h +41 -29
- data/vendor/faiss/faiss/impl/HNSW.cpp +606 -617
- data/vendor/faiss/faiss/impl/HNSW.h +179 -200
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +855 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +244 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +487 -0
- data/vendor/faiss/faiss/impl/NNDescent.h +154 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +679 -0
- data/vendor/faiss/faiss/impl/NSG.h +199 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +484 -454
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +52 -55
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +26 -47
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +469 -459
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +76 -87
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +758 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +188 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -132
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +647 -707
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +48 -46
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +129 -131
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +61 -55
- data/vendor/faiss/faiss/impl/index_read.cpp +631 -480
- data/vendor/faiss/faiss/impl/index_write.cpp +547 -407
- data/vendor/faiss/faiss/impl/io.cpp +76 -95
- data/vendor/faiss/faiss/impl/io.h +31 -41
- data/vendor/faiss/faiss/impl/io_macros.h +60 -29
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +137 -186
- data/vendor/faiss/faiss/impl/lattice_Zn.h +40 -51
- data/vendor/faiss/faiss/impl/platform_macros.h +29 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +77 -124
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +39 -48
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +41 -52
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +80 -117
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +109 -137
- data/vendor/faiss/faiss/index_factory.cpp +619 -397
- data/vendor/faiss/faiss/index_factory.h +8 -6
- data/vendor/faiss/faiss/index_io.h +23 -26
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +67 -75
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +22 -24
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +96 -112
- data/vendor/faiss/faiss/invlists/DirectMap.h +29 -33
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +307 -364
- data/vendor/faiss/faiss/invlists/InvertedLists.h +151 -151
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +29 -34
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +17 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +257 -293
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +50 -45
- data/vendor/faiss/faiss/python/python_callbacks.cpp +23 -26
- data/vendor/faiss/faiss/python/python_callbacks.h +9 -16
- data/vendor/faiss/faiss/utils/AlignedTable.h +79 -44
- data/vendor/faiss/faiss/utils/Heap.cpp +40 -48
- data/vendor/faiss/faiss/utils/Heap.h +186 -209
- data/vendor/faiss/faiss/utils/WorkerThread.cpp +67 -76
- data/vendor/faiss/faiss/utils/WorkerThread.h +32 -33
- data/vendor/faiss/faiss/utils/distances.cpp +305 -312
- data/vendor/faiss/faiss/utils/distances.h +170 -122
- data/vendor/faiss/faiss/utils/distances_simd.cpp +498 -508
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +117 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +113 -232
- data/vendor/faiss/faiss/utils/extra_distances.h +30 -29
- data/vendor/faiss/faiss/utils/hamming-inl.h +260 -209
- data/vendor/faiss/faiss/utils/hamming.cpp +375 -469
- data/vendor/faiss/faiss/utils/hamming.h +62 -85
- data/vendor/faiss/faiss/utils/ordered_key_value.h +16 -18
- data/vendor/faiss/faiss/utils/partitioning.cpp +393 -318
- data/vendor/faiss/faiss/utils/partitioning.h +26 -21
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +78 -66
- data/vendor/faiss/faiss/utils/quantize_lut.h +22 -20
- data/vendor/faiss/faiss/utils/random.cpp +39 -63
- data/vendor/faiss/faiss/utils/random.h +13 -16
- data/vendor/faiss/faiss/utils/simdlib.h +4 -2
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +88 -85
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +226 -165
- data/vendor/faiss/faiss/utils/simdlib_neon.h +832 -0
- data/vendor/faiss/faiss/utils/utils.cpp +304 -287
- data/vendor/faiss/faiss/utils/utils.h +54 -49
- metadata +29 -4
|
@@ -14,202 +14,196 @@ namespace faiss {
|
|
|
14
14
|
|
|
15
15
|
template <typename IndexT>
|
|
16
16
|
IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(bool threaded)
|
|
17
|
-
|
|
18
|
-
}
|
|
17
|
+
: ThreadedIndex<IndexT>(threaded) {}
|
|
19
18
|
|
|
20
19
|
template <typename IndexT>
|
|
21
20
|
IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(idx_t d, bool threaded)
|
|
22
|
-
|
|
23
|
-
}
|
|
21
|
+
: ThreadedIndex<IndexT>(d, threaded) {}
|
|
24
22
|
|
|
25
23
|
template <typename IndexT>
|
|
26
24
|
IndexReplicasTemplate<IndexT>::IndexReplicasTemplate(int d, bool threaded)
|
|
27
|
-
|
|
28
|
-
}
|
|
25
|
+
: ThreadedIndex<IndexT>(d, threaded) {}
|
|
29
26
|
|
|
30
27
|
template <typename IndexT>
|
|
31
|
-
void
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
28
|
+
void IndexReplicasTemplate<IndexT>::onAfterAddIndex(IndexT* index) {
|
|
29
|
+
// Make sure that the parameters are the same for all prior indices, unless
|
|
30
|
+
// we're the first index to be added
|
|
31
|
+
if (this->count() > 0 && this->at(0) != index) {
|
|
32
|
+
auto existing = this->at(0);
|
|
33
|
+
|
|
34
|
+
FAISS_THROW_IF_NOT_FMT(
|
|
35
|
+
index->ntotal == existing->ntotal,
|
|
36
|
+
"IndexReplicas: newly added index does "
|
|
37
|
+
"not have same number of vectors as prior index; "
|
|
38
|
+
"prior index has %" PRId64 " vectors, new index has %" PRId64,
|
|
39
|
+
existing->ntotal,
|
|
40
|
+
index->ntotal);
|
|
41
|
+
|
|
42
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
43
|
+
index->is_trained == existing->is_trained,
|
|
44
|
+
"IndexReplicas: newly added index does "
|
|
45
|
+
"not have same train status as prior index");
|
|
46
|
+
|
|
47
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
48
|
+
index->d == existing->d,
|
|
49
|
+
"IndexReplicas: newly added index does "
|
|
50
|
+
"not have same dimension as prior index");
|
|
51
|
+
} else {
|
|
52
|
+
syncWithSubIndexes();
|
|
53
|
+
}
|
|
54
54
|
}
|
|
55
55
|
|
|
56
56
|
template <typename IndexT>
|
|
57
|
-
void
|
|
58
|
-
|
|
59
|
-
syncWithSubIndexes();
|
|
57
|
+
void IndexReplicasTemplate<IndexT>::onAfterRemoveIndex(IndexT* index) {
|
|
58
|
+
syncWithSubIndexes();
|
|
60
59
|
}
|
|
61
60
|
|
|
62
61
|
template <typename IndexT>
|
|
63
|
-
void
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
printf("end train replica %d\n", i);
|
|
75
|
-
}
|
|
62
|
+
void IndexReplicasTemplate<IndexT>::train(idx_t n, const component_t* x) {
|
|
63
|
+
auto fn = [n, x](int i, IndexT* index) {
|
|
64
|
+
if (index->verbose) {
|
|
65
|
+
printf("begin train replica %d on %" PRId64 " points\n", i, n);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
index->train(n, x);
|
|
69
|
+
|
|
70
|
+
if (index->verbose) {
|
|
71
|
+
printf("end train replica %d\n", i);
|
|
72
|
+
}
|
|
76
73
|
};
|
|
77
74
|
|
|
78
|
-
|
|
79
|
-
|
|
75
|
+
this->runOnIndex(fn);
|
|
76
|
+
syncWithSubIndexes();
|
|
80
77
|
}
|
|
81
78
|
|
|
82
79
|
template <typename IndexT>
|
|
83
|
-
void
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
printf("end add replica %d\n", i);
|
|
95
|
-
}
|
|
80
|
+
void IndexReplicasTemplate<IndexT>::add(idx_t n, const component_t* x) {
|
|
81
|
+
auto fn = [n, x](int i, IndexT* index) {
|
|
82
|
+
if (index->verbose) {
|
|
83
|
+
printf("begin add replica %d on %" PRId64 " points\n", i, n);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
index->add(n, x);
|
|
87
|
+
|
|
88
|
+
if (index->verbose) {
|
|
89
|
+
printf("end add replica %d\n", i);
|
|
90
|
+
}
|
|
96
91
|
};
|
|
97
92
|
|
|
98
|
-
|
|
99
|
-
|
|
93
|
+
this->runOnIndex(fn);
|
|
94
|
+
syncWithSubIndexes();
|
|
100
95
|
}
|
|
101
96
|
|
|
102
97
|
template <typename IndexT>
|
|
103
|
-
void
|
|
104
|
-
|
|
105
|
-
FAISS_THROW_IF_NOT_MSG(this->count() > 0, "no replicas in index");
|
|
98
|
+
void IndexReplicasTemplate<IndexT>::reconstruct(idx_t n, component_t* x) const {
|
|
99
|
+
FAISS_THROW_IF_NOT_MSG(this->count() > 0, "no replicas in index");
|
|
106
100
|
|
|
107
|
-
|
|
108
|
-
|
|
101
|
+
// Just pass to the first replica
|
|
102
|
+
this->at(0)->reconstruct(n, x);
|
|
109
103
|
}
|
|
110
104
|
|
|
111
105
|
template <typename IndexT>
|
|
112
|
-
void
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
[queriesPerIndex, componentsPerVec,
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
106
|
+
void IndexReplicasTemplate<IndexT>::search(
|
|
107
|
+
idx_t n,
|
|
108
|
+
const component_t* x,
|
|
109
|
+
idx_t k,
|
|
110
|
+
distance_t* distances,
|
|
111
|
+
idx_t* labels) const {
|
|
112
|
+
FAISS_THROW_IF_NOT(k > 0);
|
|
113
|
+
|
|
114
|
+
FAISS_THROW_IF_NOT_MSG(this->count() > 0, "no replicas in index");
|
|
115
|
+
|
|
116
|
+
if (n == 0) {
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
auto dim = this->d;
|
|
121
|
+
size_t componentsPerVec = sizeof(component_t) == 1 ? (dim + 7) / 8 : dim;
|
|
122
|
+
|
|
123
|
+
// Partition the query by the number of indices we have
|
|
124
|
+
faiss::Index::idx_t queriesPerIndex =
|
|
125
|
+
(faiss::Index::idx_t)(n + this->count() - 1) /
|
|
126
|
+
(faiss::Index::idx_t)this->count();
|
|
127
|
+
FAISS_ASSERT(n / queriesPerIndex <= this->count());
|
|
128
|
+
|
|
129
|
+
auto fn = [queriesPerIndex, componentsPerVec, n, x, k, distances, labels](
|
|
130
|
+
int i, const IndexT* index) {
|
|
131
|
+
faiss::Index::idx_t base = (faiss::Index::idx_t)i * queriesPerIndex;
|
|
132
|
+
|
|
133
|
+
if (base < n) {
|
|
134
|
+
auto numForIndex = std::min(queriesPerIndex, n - base);
|
|
135
|
+
|
|
136
|
+
if (index->verbose) {
|
|
137
|
+
printf("begin search replica %d on %" PRId64 " points\n",
|
|
138
|
+
i,
|
|
139
|
+
numForIndex);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
index->search(
|
|
143
|
+
numForIndex,
|
|
144
|
+
x + base * componentsPerVec,
|
|
145
|
+
k,
|
|
146
|
+
distances + base * k,
|
|
147
|
+
labels + base * k);
|
|
148
|
+
|
|
149
|
+
if (index->verbose) {
|
|
150
|
+
printf("end search replica %d\n", i);
|
|
151
|
+
}
|
|
155
152
|
}
|
|
156
|
-
}
|
|
157
153
|
};
|
|
158
154
|
|
|
159
|
-
|
|
155
|
+
this->runOnIndex(fn);
|
|
160
156
|
}
|
|
161
157
|
|
|
162
158
|
// FIXME: assumes that nothing is currently running on the sub-indexes, which is
|
|
163
159
|
// true with the normal API, but should use the runOnIndex API instead
|
|
164
160
|
template <typename IndexT>
|
|
165
|
-
void
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
}
|
|
161
|
+
void IndexReplicasTemplate<IndexT>::syncWithSubIndexes() {
|
|
162
|
+
if (!this->count()) {
|
|
163
|
+
this->is_trained = false;
|
|
164
|
+
this->ntotal = 0;
|
|
165
|
+
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
auto firstIndex = this->at(0);
|
|
170
|
+
this->metric_type = firstIndex->metric_type;
|
|
171
|
+
this->is_trained = firstIndex->is_trained;
|
|
172
|
+
this->ntotal = firstIndex->ntotal;
|
|
173
|
+
|
|
174
|
+
for (int i = 1; i < this->count(); ++i) {
|
|
175
|
+
auto index = this->at(i);
|
|
176
|
+
FAISS_THROW_IF_NOT(this->metric_type == index->metric_type);
|
|
177
|
+
FAISS_THROW_IF_NOT(this->d == index->d);
|
|
178
|
+
FAISS_THROW_IF_NOT(this->is_trained == index->is_trained);
|
|
179
|
+
FAISS_THROW_IF_NOT(this->ntotal == index->ntotal);
|
|
180
|
+
}
|
|
186
181
|
}
|
|
187
182
|
|
|
188
183
|
// No metric_type for IndexBinary
|
|
189
184
|
template <>
|
|
190
|
-
void
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
}
|
|
185
|
+
void IndexReplicasTemplate<IndexBinary>::syncWithSubIndexes() {
|
|
186
|
+
if (!this->count()) {
|
|
187
|
+
this->is_trained = false;
|
|
188
|
+
this->ntotal = 0;
|
|
189
|
+
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
auto firstIndex = this->at(0);
|
|
194
|
+
this->is_trained = firstIndex->is_trained;
|
|
195
|
+
this->ntotal = firstIndex->ntotal;
|
|
196
|
+
|
|
197
|
+
for (int i = 1; i < this->count(); ++i) {
|
|
198
|
+
auto index = this->at(i);
|
|
199
|
+
FAISS_THROW_IF_NOT(this->d == index->d);
|
|
200
|
+
FAISS_THROW_IF_NOT(this->is_trained == index->is_trained);
|
|
201
|
+
FAISS_THROW_IF_NOT(this->ntotal == index->ntotal);
|
|
202
|
+
}
|
|
209
203
|
}
|
|
210
204
|
|
|
211
205
|
// explicit instantiations
|
|
212
206
|
template struct IndexReplicasTemplate<Index>;
|
|
213
207
|
template struct IndexReplicasTemplate<IndexBinary>;
|
|
214
208
|
|
|
215
|
-
} // namespace
|
|
209
|
+
} // namespace faiss
|
|
@@ -19,64 +19,70 @@ namespace faiss {
|
|
|
19
19
|
/// Each index is managed by a separate CPU thread.
|
|
20
20
|
template <typename IndexT>
|
|
21
21
|
class IndexReplicasTemplate : public ThreadedIndex<IndexT> {
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
22
|
+
public:
|
|
23
|
+
using idx_t = typename IndexT::idx_t;
|
|
24
|
+
using component_t = typename IndexT::component_t;
|
|
25
|
+
using distance_t = typename IndexT::distance_t;
|
|
26
|
+
|
|
27
|
+
/// The dimension that all sub-indices must share will be the dimension of
|
|
28
|
+
/// the first sub-index added
|
|
29
|
+
/// @param threaded do we use one thread per sub-index or do queries
|
|
30
|
+
/// sequentially?
|
|
31
|
+
explicit IndexReplicasTemplate(bool threaded = true);
|
|
32
|
+
|
|
33
|
+
/// @param d the dimension that all sub-indices must share
|
|
34
|
+
/// @param threaded do we use one thread per sub index or do queries
|
|
35
|
+
/// sequentially?
|
|
36
|
+
explicit IndexReplicasTemplate(idx_t d, bool threaded = true);
|
|
37
|
+
|
|
38
|
+
/// int version due to the implicit bool conversion ambiguity of int as
|
|
39
|
+
/// dimension
|
|
40
|
+
explicit IndexReplicasTemplate(int d, bool threaded = true);
|
|
41
|
+
|
|
42
|
+
/// Alias for addIndex()
|
|
43
|
+
void add_replica(IndexT* index) {
|
|
44
|
+
this->addIndex(index);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/// Alias for removeIndex()
|
|
48
|
+
void remove_replica(IndexT* index) {
|
|
49
|
+
this->removeIndex(index);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/// faiss::Index API
|
|
53
|
+
/// All indices receive the same call
|
|
54
|
+
void train(idx_t n, const component_t* x) override;
|
|
55
|
+
|
|
56
|
+
/// faiss::Index API
|
|
57
|
+
/// All indices receive the same call
|
|
58
|
+
void add(idx_t n, const component_t* x) override;
|
|
59
|
+
|
|
60
|
+
/// faiss::Index API
|
|
61
|
+
/// Query is partitioned into a slice for each sub-index
|
|
62
|
+
/// split by ceil(n / #indices) for our sub-indices
|
|
63
|
+
void search(
|
|
64
|
+
idx_t n,
|
|
65
|
+
const component_t* x,
|
|
66
|
+
idx_t k,
|
|
67
|
+
distance_t* distances,
|
|
68
|
+
idx_t* labels) const override;
|
|
69
|
+
|
|
70
|
+
/// reconstructs from the first index
|
|
71
|
+
void reconstruct(idx_t, component_t* v) const override;
|
|
72
|
+
|
|
73
|
+
/// Synchronize the top-level index (IndexShards) with data in the
|
|
74
|
+
/// sub-indices
|
|
75
|
+
void syncWithSubIndexes();
|
|
76
|
+
|
|
77
|
+
protected:
|
|
78
|
+
/// Called just after an index is added
|
|
79
|
+
void onAfterAddIndex(IndexT* index) override;
|
|
80
|
+
|
|
81
|
+
/// Called just after an index is removed
|
|
82
|
+
void onAfterRemoveIndex(IndexT* index) override;
|
|
77
83
|
};
|
|
78
84
|
|
|
79
85
|
using IndexReplicas = IndexReplicasTemplate<Index>;
|
|
80
86
|
using IndexBinaryReplicas = IndexReplicasTemplate<IndexBinary>;
|
|
81
87
|
|
|
82
|
-
} // namespace
|
|
88
|
+
} // namespace faiss
|