faiss 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +23 -21
- data/ext/faiss/extconf.rb +11 -0
- data/ext/faiss/index.cpp +4 -4
- data/ext/faiss/index_binary.cpp +6 -6
- data/ext/faiss/product_quantizer.cpp +4 -4
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +13 -0
- data/vendor/faiss/faiss/IVFlib.cpp +101 -2
- data/vendor/faiss/faiss/IVFlib.h +26 -2
- data/vendor/faiss/faiss/Index.cpp +36 -3
- data/vendor/faiss/faiss/Index.h +43 -6
- data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
- data/vendor/faiss/faiss/Index2Layer.h +6 -1
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
- data/vendor/faiss/faiss/IndexBinary.h +18 -3
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
- data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
- data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
- data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
- data/vendor/faiss/faiss/IndexFastScan.h +145 -0
- data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
- data/vendor/faiss/faiss/IndexFlat.h +7 -4
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
- data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
- data/vendor/faiss/faiss/IndexHNSW.h +4 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
- data/vendor/faiss/faiss/IndexIDMap.h +107 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
- data/vendor/faiss/faiss/IndexIVF.h +35 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
- data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
- data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
- data/vendor/faiss/faiss/IndexLSH.h +2 -1
- data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
- data/vendor/faiss/faiss/IndexLattice.h +3 -1
- data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
- data/vendor/faiss/faiss/IndexNSG.h +25 -1
- data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
- data/vendor/faiss/faiss/IndexPQ.h +19 -5
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
- data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
- data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
- data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
- data/vendor/faiss/faiss/IndexRefine.h +4 -2
- data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
- data/vendor/faiss/faiss/IndexReplicas.h +2 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
- data/vendor/faiss/faiss/IndexShards.cpp +4 -1
- data/vendor/faiss/faiss/IndexShards.h +2 -1
- data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
- data/vendor/faiss/faiss/MetaIndexes.h +3 -81
- data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
- data/vendor/faiss/faiss/VectorTransform.h +22 -4
- data/vendor/faiss/faiss/clone_index.cpp +23 -1
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
- data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
- data/vendor/faiss/faiss/impl/HNSW.h +19 -16
- data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
- data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
- data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
- data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
- data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
- data/vendor/faiss/faiss/index_factory.cpp +196 -7
- data/vendor/faiss/faiss/index_io.h +5 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
- data/vendor/faiss/faiss/utils/Heap.h +31 -15
- data/vendor/faiss/faiss/utils/distances.cpp +380 -56
- data/vendor/faiss/faiss/utils/distances.h +113 -15
- data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
- data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
- data/vendor/faiss/faiss/utils/fp16.h +11 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
- data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
- data/vendor/faiss/faiss/utils/random.cpp +53 -0
- data/vendor/faiss/faiss/utils/random.h +5 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
- data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
- metadata +37 -3
@@ -43,19 +43,27 @@ struct VectorTransform {
|
|
43
43
|
*/
|
44
44
|
virtual void train(idx_t n, const float* x);
|
45
45
|
|
46
|
-
/** apply the
|
47
|
-
* @param
|
48
|
-
* @
|
46
|
+
/** apply the transformation and return the result in an allocated pointer
|
47
|
+
* @param n number of vectors to transform
|
48
|
+
* @param x input vectors, size n * d_in
|
49
|
+
* @return output vectors, size n * d_out
|
49
50
|
*/
|
50
51
|
float* apply(idx_t n, const float* x) const;
|
51
52
|
|
52
|
-
|
53
|
+
/** apply the transformation and return the result in a provided matrix
|
54
|
+
* @param n number of vectors to transform
|
55
|
+
* @param x input vectors, size n * d_in
|
56
|
+
* @param xt output vectors, size n * d_out
|
57
|
+
*/
|
53
58
|
virtual void apply_noalloc(idx_t n, const float* x, float* xt) const = 0;
|
54
59
|
|
55
60
|
/// reverse transformation. May not be implemented or may return
|
56
61
|
/// approximate result
|
57
62
|
virtual void reverse_transform(idx_t n, const float* xt, float* x) const;
|
58
63
|
|
64
|
+
// check that the two transforms are identical (to merge indexes)
|
65
|
+
virtual void check_identical(const VectorTransform& other) const = 0;
|
66
|
+
|
59
67
|
virtual ~VectorTransform() {}
|
60
68
|
};
|
61
69
|
|
@@ -100,6 +108,8 @@ struct LinearTransform : VectorTransform {
|
|
100
108
|
int n,
|
101
109
|
int d) const;
|
102
110
|
|
111
|
+
void check_identical(const VectorTransform& other) const override;
|
112
|
+
|
103
113
|
~LinearTransform() override {}
|
104
114
|
};
|
105
115
|
|
@@ -207,6 +217,8 @@ struct ITQTransform : VectorTransform {
|
|
207
217
|
void train(idx_t n, const float* x) override;
|
208
218
|
|
209
219
|
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
220
|
+
|
221
|
+
void check_identical(const VectorTransform& other) const override;
|
210
222
|
};
|
211
223
|
|
212
224
|
struct ProductQuantizer;
|
@@ -260,6 +272,8 @@ struct RemapDimensionsTransform : VectorTransform {
|
|
260
272
|
void reverse_transform(idx_t n, const float* xt, float* x) const override;
|
261
273
|
|
262
274
|
RemapDimensionsTransform() {}
|
275
|
+
|
276
|
+
void check_identical(const VectorTransform& other) const override;
|
263
277
|
};
|
264
278
|
|
265
279
|
/** per-vector normalization */
|
@@ -273,6 +287,8 @@ struct NormalizationTransform : VectorTransform {
|
|
273
287
|
|
274
288
|
/// Identity transform since norm is not revertible
|
275
289
|
void reverse_transform(idx_t n, const float* xt, float* x) const override;
|
290
|
+
|
291
|
+
void check_identical(const VectorTransform& other) const override;
|
276
292
|
};
|
277
293
|
|
278
294
|
/** Subtract the mean of each component from the vectors. */
|
@@ -290,6 +306,8 @@ struct CenteringTransform : VectorTransform {
|
|
290
306
|
|
291
307
|
/// add the mean
|
292
308
|
void reverse_transform(idx_t n, const float* xt, float* x) const override;
|
309
|
+
|
310
|
+
void check_identical(const VectorTransform& other) const override;
|
293
311
|
};
|
294
312
|
|
295
313
|
} // namespace faiss
|
@@ -32,6 +32,11 @@
|
|
32
32
|
#include <faiss/MetaIndexes.h>
|
33
33
|
#include <faiss/VectorTransform.h>
|
34
34
|
|
35
|
+
#include <faiss/impl/LocalSearchQuantizer.h>
|
36
|
+
#include <faiss/impl/ProductQuantizer.h>
|
37
|
+
#include <faiss/impl/ResidualQuantizer.h>
|
38
|
+
#include <faiss/impl/ScalarQuantizer.h>
|
39
|
+
|
35
40
|
namespace faiss {
|
36
41
|
|
37
42
|
/*************************************************************
|
@@ -117,7 +122,9 @@ Index* Cloner::clone_Index(const Index* index) {
|
|
117
122
|
return res;
|
118
123
|
} else if (
|
119
124
|
const IndexIDMap* idmap = dynamic_cast<const IndexIDMap*>(index)) {
|
120
|
-
|
125
|
+
const IndexIDMap2* idmap2 = dynamic_cast<const IndexIDMap2*>(index);
|
126
|
+
IndexIDMap* res =
|
127
|
+
idmap2 ? new IndexIDMap2(*idmap2) : new IndexIDMap(*idmap);
|
121
128
|
res->own_fields = true;
|
122
129
|
res->index = clone_Index(idmap->index);
|
123
130
|
return res;
|
@@ -137,6 +144,13 @@ Index* Cloner::clone_Index(const Index* index) {
|
|
137
144
|
res->own_fields = true;
|
138
145
|
res->storage = clone_Index(insg->storage);
|
139
146
|
return res;
|
147
|
+
} else if (
|
148
|
+
const IndexNNDescent* innd =
|
149
|
+
dynamic_cast<const IndexNNDescent*>(index)) {
|
150
|
+
IndexNNDescent* res = new IndexNNDescent(*innd);
|
151
|
+
res->own_fields = true;
|
152
|
+
res->storage = clone_Index(innd->storage);
|
153
|
+
return res;
|
140
154
|
} else if (
|
141
155
|
const Index2Layer* i2l = dynamic_cast<const Index2Layer*>(index)) {
|
142
156
|
Index2Layer* res = new Index2Layer(*i2l);
|
@@ -149,4 +163,12 @@ Index* Cloner::clone_Index(const Index* index) {
|
|
149
163
|
return nullptr;
|
150
164
|
}
|
151
165
|
|
166
|
+
Quantizer* clone_Quantizer(const Quantizer* quant) {
|
167
|
+
TRYCLONE(ResidualQuantizer, quant)
|
168
|
+
TRYCLONE(LocalSearchQuantizer, quant)
|
169
|
+
TRYCLONE(ProductQuantizer, quant)
|
170
|
+
TRYCLONE(ScalarQuantizer, quant)
|
171
|
+
FAISS_THROW_MSG("Did not recognize quantizer to clone");
|
172
|
+
}
|
173
|
+
|
152
174
|
} // namespace faiss
|
@@ -16,6 +16,7 @@ namespace faiss {
|
|
16
16
|
struct Index;
|
17
17
|
struct IndexIVF;
|
18
18
|
struct VectorTransform;
|
19
|
+
struct Quantizer;
|
19
20
|
|
20
21
|
/* cloning functions */
|
21
22
|
Index* clone_index(const Index*);
|
@@ -30,4 +31,6 @@ struct Cloner {
|
|
30
31
|
virtual ~Cloner() {}
|
31
32
|
};
|
32
33
|
|
34
|
+
Quantizer* clone_Quantizer(const Quantizer* quant);
|
35
|
+
|
33
36
|
} // namespace faiss
|
@@ -0,0 +1,300 @@
|
|
1
|
+
// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
|
2
|
+
|
3
|
+
#pragma once
|
4
|
+
|
5
|
+
// This file contains a custom fast implementation of faiss::Index::sa_decode()
|
6
|
+
// function for the following index families:
|
7
|
+
// * IVF256,PQ[1]x8np
|
8
|
+
// * Residual[1]x8,PQ[2]x8
|
9
|
+
// * IVF[2^9-2^16 bit],PQ[1]x8 (such as IVF1024,PQ16np)
|
10
|
+
// * Residual1x[9-16 bit],PQ[1]x8 (such as Residual1x9,PQ8)
|
11
|
+
// * PQ[1]x8
|
12
|
+
// Additionally, AVX2 and ARM versions support
|
13
|
+
// * Residual[1]x8,PQ[2]x10
|
14
|
+
// * Residual[1]x8,PQ[2]x16
|
15
|
+
// * Residual[1]x10,PQ[2]x10
|
16
|
+
// * Residual[1]x10,PQ[2]x16
|
17
|
+
// * Residual[1]x16,PQ[2]x10
|
18
|
+
// * Residual[1]x16,PQ[2]x16
|
19
|
+
// * Residual1x[9-16 bit],PQ[1]x10 (such as Residual1x9,PQ16x10)
|
20
|
+
// * * (use with COARSE_BITS=16)
|
21
|
+
// * Residual1x[9-16 bit],PQ[1]x16 (such as Residual1x9,PQ16x16)
|
22
|
+
// * * (use with COARSE_BITS=16)
|
23
|
+
// * PQ[1]x10
|
24
|
+
// * PQ[1]x16
|
25
|
+
// Unfortunately, currently Faiss does not support something like
|
26
|
+
// IVF256,PQ16x10np
|
27
|
+
//
|
28
|
+
// The goal was to achieve the maximum performance, so the template version it
|
29
|
+
// is. The provided index families share the same code for sa_decode.
|
30
|
+
//
|
31
|
+
// The front-end code provides two high-level structures.
|
32
|
+
//
|
33
|
+
// First one:
|
34
|
+
// {
|
35
|
+
// template <
|
36
|
+
// intptr_t DIM,
|
37
|
+
// intptr_t COARSE_SIZE,
|
38
|
+
// intptr_t FINE_SIZE,
|
39
|
+
// intptr_t COARSE_BITS = 8
|
40
|
+
// intptr_t FINE_BITS = 8>
|
41
|
+
// struct Index2LevelDecoder { /*...*/ };
|
42
|
+
// }
|
43
|
+
// * DIM is the dimensionality of data
|
44
|
+
// * COARSE_SIZE is the dimensionality of the coarse quantizer (IVF, Residual)
|
45
|
+
// * FINE_SIZE is the dimensionality of the ProductQuantizer dsq
|
46
|
+
// * COARSE_BITS is the number of bits that are needed to represent a coarse
|
47
|
+
// quantizer code.
|
48
|
+
// * FINE_BITS is the number of bits that are needed to represent a fine
|
49
|
+
// quantizer code.
|
50
|
+
// For example, "IVF256,PQ8np" for 160-dim data translates into
|
51
|
+
// Index2LevelDecoder<160,160,20,8>
|
52
|
+
// For example, "Residual4x8,PQ16" for 256-dim data translates into
|
53
|
+
// Index2LevelDecoder<256,64,1,8>
|
54
|
+
// For example, "IVF1024,PQ16np" for 256-dim data translates into
|
55
|
+
// Index2LevelDecoder<256,256,16,10>. But as there are only 1 coarse code
|
56
|
+
// element, Index2LevelDecoder<256,256,16,16> can be used as a faster
|
57
|
+
// decoder.
|
58
|
+
// For example, "Residual4x10,PQ16x10np" for 256-dim data translates into
|
59
|
+
// Index2LevelDecoder<256,64,16,10,10>
|
60
|
+
//
|
61
|
+
// Additional supported values for COARSE_BITS and FINE_BITS may be added later.
|
62
|
+
//
|
63
|
+
// Second one:
|
64
|
+
// {
|
65
|
+
// template <
|
66
|
+
// intptr_t DIM,
|
67
|
+
// intptr_t FINE_SIZE,
|
68
|
+
// intptr_t FINE_BITS = 8>
|
69
|
+
// struct IndexPQDecoder { /*...*/ };
|
70
|
+
// }
|
71
|
+
// * DIM is the dimensionality of data
|
72
|
+
// * FINE_SIZE is the dimensionality of the ProductQuantizer dsq
|
73
|
+
// * FINE_BITS is the number of bits that are needed to represent a fine
|
74
|
+
// quantizer code.
|
75
|
+
// For example, "PQ8np" for 160-dim data translates into
|
76
|
+
// IndexPQDecoder<160,20>
|
77
|
+
//
|
78
|
+
// Unlike the general purpose version in faiss::Index::sa_decode(),
|
79
|
+
// this version provides the following functions (please note that
|
80
|
+
// pqCoarseCentroids params are not available for IndexPQDecoder,
|
81
|
+
// but the functionality is the same as for Index2LevelDecoder):
|
82
|
+
//
|
83
|
+
// * ::store(), which is similar to sa_decode(1, input, output),
|
84
|
+
// The method signature is the following:
|
85
|
+
// {
|
86
|
+
// void store(
|
87
|
+
// const float* const __restrict pqCoarseCentroids,
|
88
|
+
// const float* const __restrict pqFineCentroids,
|
89
|
+
// const uint8_t* const __restrict code,
|
90
|
+
// float* const __restrict outputStore);
|
91
|
+
// }
|
92
|
+
//
|
93
|
+
// * ::accum(), which is used to create a linear combination
|
94
|
+
// of decoded vectors:
|
95
|
+
// {
|
96
|
+
// const faiss::Index* const index;
|
97
|
+
// const uint8_t* const input;
|
98
|
+
// float weight;
|
99
|
+
//
|
100
|
+
// std::vector<float> buffer(d, 0);
|
101
|
+
//
|
102
|
+
// index->sa_decode(1, input, buffer.data());
|
103
|
+
// for (size_t iDim = 0; iDim < d; iDim++)
|
104
|
+
// output[iDim] += weight * buffer[iDim];
|
105
|
+
// }
|
106
|
+
// The method signature is the following:
|
107
|
+
// {
|
108
|
+
// static void accum(
|
109
|
+
// const float* const __restrict pqCoarseCentroids,
|
110
|
+
// const float* const __restrict pqFineCentroids,
|
111
|
+
// const uint8_t* const __restrict code,
|
112
|
+
// const float weight,
|
113
|
+
// float* const __restrict outputAccum);
|
114
|
+
// }
|
115
|
+
//
|
116
|
+
// * There is an additional overload for ::accum() that decodes two vectors
|
117
|
+
// per call. This provides an additional speedup because of a CPU
|
118
|
+
// superscalar architecture:
|
119
|
+
// {
|
120
|
+
// const faiss::Index* const index;
|
121
|
+
// const uint8_t* const input0;
|
122
|
+
// float weight0;
|
123
|
+
// const uint8_t* const input1;
|
124
|
+
// float weight1;
|
125
|
+
//
|
126
|
+
// std::vector<float> buffer(d, 0);
|
127
|
+
//
|
128
|
+
// index->sa_decode(1, input0, buffer.data());
|
129
|
+
// for (size_t iDim = 0; iDim < d; iDim++)
|
130
|
+
// output[iDim] += weight0 * buffer[iDim];
|
131
|
+
//
|
132
|
+
// index->sa_decode(1, input1, buffer.data());
|
133
|
+
// for (size_t iDim = 0; iDim < d; iDim++)
|
134
|
+
// output[iDim] += weight1 * buffer[iDim];
|
135
|
+
// }
|
136
|
+
// If each code uses its own coarse quantizer centroids table and its own fine
|
137
|
+
// quantizer centroids table, then the following overload can be used:
|
138
|
+
// {
|
139
|
+
// static void accum(
|
140
|
+
// const float* const __restrict pqCoarseCentroids0,
|
141
|
+
// const float* const __restrict pqFineCentroids0,
|
142
|
+
// const uint8_t* const __restrict code0,
|
143
|
+
// const float weight0,
|
144
|
+
// const float* const __restrict pqCoarseCentroids1,
|
145
|
+
// const float* const __restrict pqFineCentroids1,
|
146
|
+
// const uint8_t* const __restrict code1,
|
147
|
+
// const float weight1,
|
148
|
+
// float* const __restrict outputAccum);
|
149
|
+
// }
|
150
|
+
// If codes share the coarse quantizer centroids table and also share
|
151
|
+
// the fine quantizer centroids table, then the following overload can be
|
152
|
+
// used:
|
153
|
+
// {
|
154
|
+
// static void accum(
|
155
|
+
// const float* const __restrict pqCoarseCentroids,
|
156
|
+
// const float* const __restrict pqFineCentroids,
|
157
|
+
// const uint8_t* const __restrict code0,
|
158
|
+
// const float weight0,
|
159
|
+
// const uint8_t* const __restrict code1,
|
160
|
+
// const float weight1,
|
161
|
+
// float* const __restrict outputAccum);
|
162
|
+
// }
|
163
|
+
//
|
164
|
+
// * And one more overload for ::accum() that decodes and accumulates
|
165
|
+
// three vectors per call.
|
166
|
+
// {
|
167
|
+
// const faiss::Index* const index;
|
168
|
+
// const uint8_t* const input0;
|
169
|
+
// float weight0;
|
170
|
+
// const uint8_t* const input1;
|
171
|
+
// float weight1;
|
172
|
+
// const uint8_t* const input2;
|
173
|
+
// float weight2;
|
174
|
+
//
|
175
|
+
// std::vector<float> buffer(d, 0);
|
176
|
+
//
|
177
|
+
// index->sa_decode(1, input0, buffer.data());
|
178
|
+
// for (size_t iDim = 0; iDim < d; iDim++)
|
179
|
+
// output[iDim] += weight0 * buffer[iDim];
|
180
|
+
//
|
181
|
+
// index->sa_decode(1, input1, buffer.data());
|
182
|
+
// for (size_t iDim = 0; iDim < d; iDim++)
|
183
|
+
// output[iDim] += weight1 * buffer[iDim];
|
184
|
+
//
|
185
|
+
// index->sa_decode(1, input2, buffer.data());
|
186
|
+
// for (size_t iDim = 0; iDim < d; iDim++)
|
187
|
+
// output[iDim] += weight2 * buffer[iDim];
|
188
|
+
// }
|
189
|
+
//
|
190
|
+
// If each code uses its own coarse quantizer centroids table and its own fine
|
191
|
+
// quantizer centroids table, then the following overload can be used:
|
192
|
+
// {
|
193
|
+
// static void accum(
|
194
|
+
// const float* const __restrict pqCoarseCentroids0,
|
195
|
+
// const float* const __restrict pqFineCentroids0,
|
196
|
+
// const uint8_t* const __restrict code0,
|
197
|
+
// const float weight0,
|
198
|
+
// const float* const __restrict pqCoarseCentroids1,
|
199
|
+
// const float* const __restrict pqFineCentroids1,
|
200
|
+
// const uint8_t* const __restrict code1,
|
201
|
+
// const float weight1,
|
202
|
+
// const float* const __restrict pqCoarseCentroids2,
|
203
|
+
// const float* const __restrict pqFineCentroids2,
|
204
|
+
// const uint8_t* const __restrict code2,
|
205
|
+
// const float weight2,
|
206
|
+
// float* const __restrict outputAccum);
|
207
|
+
// }
|
208
|
+
// If codes share the coarse quantizer centroids table and also share
|
209
|
+
// the fine quantizer centroids table, then the following overload can be
|
210
|
+
// used:
|
211
|
+
// {
|
212
|
+
// static void accum(
|
213
|
+
// const float* const __restrict pqCoarseCentroids,
|
214
|
+
// const float* const __restrict pqFineCentroids,
|
215
|
+
// const uint8_t* const __restrict code0,
|
216
|
+
// const float weight0,
|
217
|
+
// const uint8_t* const __restrict code1,
|
218
|
+
// const float weight1,
|
219
|
+
// const uint8_t* const __restrict code2,
|
220
|
+
// const float weight2,
|
221
|
+
// float* const __restrict outputAccum);
|
222
|
+
// }
|
223
|
+
//
|
224
|
+
// The provided version is not multithreaded.
|
225
|
+
//
|
226
|
+
// Currently, an AVX2+FMA implementation is available. AVX512 version is also
|
227
|
+
// doable, but it was found to be slower than AVX2 for real world applications
|
228
|
+
// that I needed.
|
229
|
+
//
|
230
|
+
////////////////////////////////////////////////////////////////////////////////////
|
231
|
+
//
|
232
|
+
// It is possible to use an additional index wrapper on top of IVFPQ /
|
233
|
+
// Residual+PQ, known as IndexRowwiseMinMax / IndexRowwiseMinMaxFP16. Index
|
234
|
+
// wrapper that performs rowwise normalization to [0,1], preserving the
|
235
|
+
// coefficients. This is a vector codec index only.
|
236
|
+
// For more details please refer to the description in
|
237
|
+
// faiss/IndexRowwiseMinMax.h file.
|
238
|
+
//
|
239
|
+
// If such a wrapper is used, then the quantizer will look like, say,
|
240
|
+
// MinMaxFP16,IVF256,PQ32np
|
241
|
+
// or
|
242
|
+
// MinMax,PQ16np
|
243
|
+
// In this case, please use the following contruction for the decoding,
|
244
|
+
// basically, wrapping a kernel in a kernel:
|
245
|
+
// {
|
246
|
+
// using SubT = faiss::cppcontrib::Index2LevelDecoder<128, 128, 2>;
|
247
|
+
// using T = faiss::cppcontrib::IndexMinMaxFP16Decoder<SubT>;
|
248
|
+
// // do T::store(...) or T::accum(...)
|
249
|
+
// }
|
250
|
+
//
|
251
|
+
// T::accum(...) contains an additional function variable which is
|
252
|
+
// used for accumulating scaling. Thus, the code pattern is the following:
|
253
|
+
// {
|
254
|
+
// const float* const __restrict pqCoarseCentroidsQ;
|
255
|
+
// const float* const __restrict pqFineCentroidsQ;
|
256
|
+
// const uint8_t* const __restrict input;
|
257
|
+
// const float* const __restrict weights;
|
258
|
+
// float* const __restrict output;
|
259
|
+
// float outputAccumMin = 0;
|
260
|
+
//
|
261
|
+
// for (size_t i = 0; i < n; i++) {
|
262
|
+
// T::accum(
|
263
|
+
// pqCoarseCentroidsQ,
|
264
|
+
// pqFineCentroidsQ,
|
265
|
+
// input + i * code_size,
|
266
|
+
// weights[i],
|
267
|
+
// output,
|
268
|
+
// outputAccumMin);
|
269
|
+
// }
|
270
|
+
// for (size_t j = 0; j < d; j++)
|
271
|
+
// output[j] += outputAccumMin;
|
272
|
+
// }
|
273
|
+
// This is similar to the following regular pseudo-code:
|
274
|
+
// {
|
275
|
+
// const faiss::Index* const index;
|
276
|
+
// const uint8_t* const __restrict input;
|
277
|
+
// const float* const __restrict weights;
|
278
|
+
// float* const __restrict output;
|
279
|
+
//
|
280
|
+
// for (size_t i = 0; i < n; i++) {
|
281
|
+
// std::vector<float> buffer(d, 0);
|
282
|
+
//
|
283
|
+
// index->sa_decode(1, input + i * code_size, buffer.data());
|
284
|
+
// for (size_t j = 0; j < d; j++)
|
285
|
+
// output[j] += weights[i] * buffer[j];
|
286
|
+
// }
|
287
|
+
|
288
|
+
#include <faiss/cppcontrib/sa_decode/MinMax-inl.h>
|
289
|
+
#include <faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h>
|
290
|
+
|
291
|
+
#ifdef __AVX2__
|
292
|
+
#include <faiss/cppcontrib/sa_decode/Level2-avx2-inl.h>
|
293
|
+
#include <faiss/cppcontrib/sa_decode/PQ-avx2-inl.h>
|
294
|
+
#elif defined(__ARM_NEON)
|
295
|
+
#include <faiss/cppcontrib/sa_decode/Level2-neon-inl.h>
|
296
|
+
#include <faiss/cppcontrib/sa_decode/PQ-neon-inl.h>
|
297
|
+
#else
|
298
|
+
#include <faiss/cppcontrib/sa_decode/Level2-inl.h>
|
299
|
+
#include <faiss/cppcontrib/sa_decode/PQ-inl.h>
|
300
|
+
#endif
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <cstdint>
|
4
|
+
|
5
|
+
namespace faiss {
|
6
|
+
namespace cppcontrib {
|
7
|
+
namespace detail {
|
8
|
+
|
9
|
+
template <int COARSE_BITS>
|
10
|
+
struct CoarseBitType {};
|
11
|
+
|
12
|
+
template <>
|
13
|
+
struct CoarseBitType<8> {
|
14
|
+
using bit_type = uint8_t;
|
15
|
+
};
|
16
|
+
|
17
|
+
template <>
|
18
|
+
struct CoarseBitType<16> {
|
19
|
+
using bit_type = uint16_t;
|
20
|
+
};
|
21
|
+
|
22
|
+
} // namespace detail
|
23
|
+
} // namespace cppcontrib
|
24
|
+
} // namespace faiss
|
@@ -0,0 +1,195 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <cstdint>
|
4
|
+
|
5
|
+
namespace faiss {
|
6
|
+
namespace cppcontrib {
|
7
|
+
namespace detail {
|
8
|
+
|
9
|
+
namespace {
|
10
|
+
|
11
|
+
template <intptr_t N_ELEMENTS, intptr_t CPOS>
|
12
|
+
struct Uint8Reader {
|
13
|
+
static_assert(CPOS < N_ELEMENTS, "CPOS should be less than N_ELEMENTS");
|
14
|
+
|
15
|
+
static intptr_t get(const uint8_t* const __restrict codes) {
|
16
|
+
// Read using 4-bytes, if possible.
|
17
|
+
// Reading using 8-byte takes too many registers somewhy.
|
18
|
+
|
19
|
+
constexpr intptr_t ELEMENT_TO_READ = CPOS / 4;
|
20
|
+
constexpr intptr_t SUB_ELEMENT = CPOS % 4;
|
21
|
+
|
22
|
+
switch (SUB_ELEMENT) {
|
23
|
+
case 0: {
|
24
|
+
if (N_ELEMENTS > CPOS + 3) {
|
25
|
+
const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
|
26
|
+
codes + ELEMENT_TO_READ * 4);
|
27
|
+
return (code32 & 0x000000FF);
|
28
|
+
} else {
|
29
|
+
return codes[CPOS];
|
30
|
+
}
|
31
|
+
}
|
32
|
+
case 1: {
|
33
|
+
if (N_ELEMENTS > CPOS + 2) {
|
34
|
+
const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
|
35
|
+
codes + ELEMENT_TO_READ * 4);
|
36
|
+
return (code32 & 0x0000FF00) >> 8;
|
37
|
+
} else {
|
38
|
+
return codes[CPOS];
|
39
|
+
}
|
40
|
+
}
|
41
|
+
case 2: {
|
42
|
+
if (N_ELEMENTS > CPOS + 1) {
|
43
|
+
const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
|
44
|
+
codes + ELEMENT_TO_READ * 4);
|
45
|
+
return (code32 & 0x00FF0000) >> 16;
|
46
|
+
} else {
|
47
|
+
return codes[CPOS];
|
48
|
+
}
|
49
|
+
}
|
50
|
+
case 3: {
|
51
|
+
if (N_ELEMENTS > CPOS) {
|
52
|
+
const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
|
53
|
+
codes + ELEMENT_TO_READ * 4);
|
54
|
+
return (code32) >> 24;
|
55
|
+
} else {
|
56
|
+
return codes[CPOS];
|
57
|
+
}
|
58
|
+
}
|
59
|
+
}
|
60
|
+
}
|
61
|
+
};
|
62
|
+
|
63
|
+
// reduces the number of read operations from RAM
|
64
|
+
///////////////////////////////////////////////
|
65
|
+
// 76543210 76543210 76543210 76543210 76543210
|
66
|
+
// 00000000 00
|
67
|
+
// 111111 1111
|
68
|
+
// 2222 222222
|
69
|
+
// 33 33333333
|
70
|
+
template <intptr_t N_ELEMENTS, intptr_t CPOS>
|
71
|
+
struct Uint10Reader {
|
72
|
+
static_assert(CPOS < N_ELEMENTS, "CPOS should be less than N_ELEMENTS");
|
73
|
+
|
74
|
+
static intptr_t get(const uint8_t* const __restrict codes) {
|
75
|
+
// Read using 4-bytes or 2-bytes.
|
76
|
+
|
77
|
+
constexpr intptr_t ELEMENT_TO_READ = CPOS / 4;
|
78
|
+
constexpr intptr_t SUB_ELEMENT = CPOS % 4;
|
79
|
+
|
80
|
+
switch (SUB_ELEMENT) {
|
81
|
+
case 0: {
|
82
|
+
if (N_ELEMENTS > CPOS + 2) {
|
83
|
+
const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
|
84
|
+
codes + ELEMENT_TO_READ * 5);
|
85
|
+
return (code32 & 0b0000001111111111);
|
86
|
+
} else {
|
87
|
+
const uint16_t code16 = *reinterpret_cast<const uint16_t*>(
|
88
|
+
codes + ELEMENT_TO_READ * 5 + 0);
|
89
|
+
return (code16 & 0b0000001111111111);
|
90
|
+
}
|
91
|
+
}
|
92
|
+
case 1: {
|
93
|
+
if (N_ELEMENTS > CPOS + 1) {
|
94
|
+
const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
|
95
|
+
codes + ELEMENT_TO_READ * 5);
|
96
|
+
return (code32 & 0b000011111111110000000000) >> 10;
|
97
|
+
} else {
|
98
|
+
const uint16_t code16 = *reinterpret_cast<const uint16_t*>(
|
99
|
+
codes + ELEMENT_TO_READ * 5 + 1);
|
100
|
+
return (code16 & 0b0000111111111100) >> 2;
|
101
|
+
}
|
102
|
+
}
|
103
|
+
case 2: {
|
104
|
+
if (N_ELEMENTS > CPOS) {
|
105
|
+
const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
|
106
|
+
codes + ELEMENT_TO_READ * 5);
|
107
|
+
return (code32 & 0b00111111111100000000000000000000) >> 20;
|
108
|
+
} else {
|
109
|
+
const uint16_t code16 = *reinterpret_cast<const uint16_t*>(
|
110
|
+
codes + ELEMENT_TO_READ * 5 + 2);
|
111
|
+
return (code16 & 0b0011111111110000) >> 4;
|
112
|
+
}
|
113
|
+
}
|
114
|
+
case 3: {
|
115
|
+
const uint16_t code16 = *reinterpret_cast<const uint16_t*>(
|
116
|
+
codes + ELEMENT_TO_READ * 5 + 3);
|
117
|
+
return (code16 & 0b1111111111000000) >> 6;
|
118
|
+
}
|
119
|
+
}
|
120
|
+
}
|
121
|
+
};
|
122
|
+
|
123
|
+
// reduces the number of read operations from RAM
|
124
|
+
template <intptr_t N_ELEMENTS, intptr_t CPOS>
|
125
|
+
struct Uint16Reader {
|
126
|
+
static_assert(CPOS < N_ELEMENTS, "CPOS should be less than N_ELEMENTS");
|
127
|
+
|
128
|
+
static intptr_t get(const uint8_t* const __restrict codes) {
|
129
|
+
// Read using 4-bytes or 2-bytes.
|
130
|
+
// Reading using 8-byte takes too many registers somewhy.
|
131
|
+
|
132
|
+
constexpr intptr_t ELEMENT_TO_READ = CPOS / 2;
|
133
|
+
constexpr intptr_t SUB_ELEMENT = CPOS % 2;
|
134
|
+
|
135
|
+
switch (SUB_ELEMENT) {
|
136
|
+
case 0: {
|
137
|
+
if (N_ELEMENTS > CPOS + 1) {
|
138
|
+
const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
|
139
|
+
codes + ELEMENT_TO_READ * 4);
|
140
|
+
return (code32 & 0x0000FFFF);
|
141
|
+
} else {
|
142
|
+
const uint16_t* const __restrict codesFp16 =
|
143
|
+
reinterpret_cast<const uint16_t*>(codes);
|
144
|
+
return codesFp16[CPOS];
|
145
|
+
}
|
146
|
+
}
|
147
|
+
case 1: {
|
148
|
+
if (N_ELEMENTS > CPOS) {
|
149
|
+
const uint32_t code32 = *reinterpret_cast<const uint32_t*>(
|
150
|
+
codes + ELEMENT_TO_READ * 4);
|
151
|
+
return code32 >> 16;
|
152
|
+
} else {
|
153
|
+
const uint16_t* const __restrict codesFp16 =
|
154
|
+
reinterpret_cast<const uint16_t*>(codes);
|
155
|
+
return codesFp16[CPOS];
|
156
|
+
}
|
157
|
+
}
|
158
|
+
}
|
159
|
+
}
|
160
|
+
};
|
161
|
+
|
162
|
+
//
|
163
|
+
template <intptr_t N_ELEMENTS, intptr_t CODE_BITS, intptr_t CPOS>
|
164
|
+
struct UintReaderImplType {};
|
165
|
+
|
166
|
+
template <intptr_t N_ELEMENTS, intptr_t CPOS>
|
167
|
+
struct UintReaderImplType<N_ELEMENTS, 8, CPOS> {
|
168
|
+
using reader_type = Uint8Reader<N_ELEMENTS, CPOS>;
|
169
|
+
};
|
170
|
+
|
171
|
+
template <intptr_t N_ELEMENTS, intptr_t CPOS>
|
172
|
+
struct UintReaderImplType<N_ELEMENTS, 10, CPOS> {
|
173
|
+
using reader_type = Uint10Reader<N_ELEMENTS, CPOS>;
|
174
|
+
};
|
175
|
+
|
176
|
+
template <intptr_t N_ELEMENTS, intptr_t CPOS>
|
177
|
+
struct UintReaderImplType<N_ELEMENTS, 16, CPOS> {
|
178
|
+
using reader_type = Uint16Reader<N_ELEMENTS, CPOS>;
|
179
|
+
};
|
180
|
+
|
181
|
+
} // namespace
|
182
|
+
|
183
|
+
// reduces the number of read operations from RAM
|
184
|
+
template <intptr_t DIM, intptr_t CODE_SIZE, intptr_t CODE_BITS, intptr_t CPOS>
|
185
|
+
using UintReader =
|
186
|
+
typename UintReaderImplType<DIM / CODE_SIZE, CODE_BITS, CPOS>::
|
187
|
+
reader_type;
|
188
|
+
|
189
|
+
template <intptr_t N_ELEMENTS, intptr_t CODE_BITS, intptr_t CPOS>
|
190
|
+
using UintReaderRaw =
|
191
|
+
typename UintReaderImplType<N_ELEMENTS, CODE_BITS, CPOS>::reader_type;
|
192
|
+
|
193
|
+
} // namespace detail
|
194
|
+
} // namespace cppcontrib
|
195
|
+
} // namespace faiss
|