faiss 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/README.md +23 -21
- data/ext/faiss/extconf.rb +11 -0
- data/ext/faiss/index.cpp +4 -4
- data/ext/faiss/index_binary.cpp +6 -6
- data/ext/faiss/product_quantizer.cpp +4 -4
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +13 -0
- data/vendor/faiss/faiss/Clustering.cpp +32 -0
- data/vendor/faiss/faiss/Clustering.h +14 -0
- data/vendor/faiss/faiss/IVFlib.cpp +101 -2
- data/vendor/faiss/faiss/IVFlib.h +26 -2
- data/vendor/faiss/faiss/Index.cpp +36 -3
- data/vendor/faiss/faiss/Index.h +43 -6
- data/vendor/faiss/faiss/Index2Layer.cpp +24 -93
- data/vendor/faiss/faiss/Index2Layer.h +8 -17
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +610 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +253 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
- data/vendor/faiss/faiss/IndexBinary.h +18 -3
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
- data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
- data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
- data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
- data/vendor/faiss/faiss/IndexFastScan.h +145 -0
- data/vendor/faiss/faiss/IndexFlat.cpp +52 -69
- data/vendor/faiss/faiss/IndexFlat.h +16 -19
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +101 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +59 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
- data/vendor/faiss/faiss/IndexHNSW.h +4 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
- data/vendor/faiss/faiss/IndexIDMap.h +107 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +200 -40
- data/vendor/faiss/faiss/IndexIVF.h +59 -22
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +393 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +183 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +43 -26
- data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +238 -53
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -2
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +63 -40
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +23 -7
- data/vendor/faiss/faiss/IndexLSH.cpp +8 -32
- data/vendor/faiss/faiss/IndexLSH.h +4 -16
- data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
- data/vendor/faiss/faiss/IndexLattice.h +3 -1
- data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -5
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +37 -5
- data/vendor/faiss/faiss/IndexNSG.h +25 -1
- data/vendor/faiss/faiss/IndexPQ.cpp +108 -120
- data/vendor/faiss/faiss/IndexPQ.h +21 -22
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
- data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
- data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
- data/vendor/faiss/faiss/IndexRefine.cpp +36 -4
- data/vendor/faiss/faiss/IndexRefine.h +14 -2
- data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
- data/vendor/faiss/faiss/IndexReplicas.h +2 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +28 -43
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +8 -23
- data/vendor/faiss/faiss/IndexShards.cpp +4 -1
- data/vendor/faiss/faiss/IndexShards.h +2 -1
- data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
- data/vendor/faiss/faiss/MetaIndexes.h +3 -81
- data/vendor/faiss/faiss/VectorTransform.cpp +45 -1
- data/vendor/faiss/faiss/VectorTransform.h +25 -4
- data/vendor/faiss/faiss/clone_index.cpp +26 -3
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -6
- data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +331 -29
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +110 -19
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
- data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +133 -32
- data/vendor/faiss/faiss/impl/HNSW.h +19 -16
- data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
- data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +378 -217
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +106 -29
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -4
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
- data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +521 -55
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +94 -16
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +108 -191
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
- data/vendor/faiss/faiss/impl/index_read.cpp +338 -24
- data/vendor/faiss/faiss/impl/index_write.cpp +300 -18
- data/vendor/faiss/faiss/impl/io.cpp +1 -1
- data/vendor/faiss/faiss/impl/io_macros.h +20 -0
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +303 -0
- data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
- data/vendor/faiss/faiss/index_factory.cpp +772 -412
- data/vendor/faiss/faiss/index_factory.h +3 -0
- data/vendor/faiss/faiss/index_io.h +5 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
- data/vendor/faiss/faiss/utils/Heap.h +31 -15
- data/vendor/faiss/faiss/utils/distances.cpp +384 -58
- data/vendor/faiss/faiss/utils/distances.h +149 -18
- data/vendor/faiss/faiss/utils/distances_simd.cpp +776 -6
- data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
- data/vendor/faiss/faiss/utils/fp16.h +11 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
- data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
- data/vendor/faiss/faiss/utils/random.cpp +53 -0
- data/vendor/faiss/faiss/utils/random.h +5 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
- data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
- data/vendor/faiss/faiss/utils/utils.h +1 -1
- metadata +46 -5
- data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
- data/vendor/faiss/faiss/IndexResidual.h +0 -152
|
@@ -29,6 +29,13 @@
|
|
|
29
29
|
#include <faiss/impl/FaissAssert.h>
|
|
30
30
|
|
|
31
31
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
32
|
+
#include <faiss/impl/IDSelector.h>
|
|
33
|
+
|
|
34
|
+
#include <faiss/impl/ProductQuantizer.h>
|
|
35
|
+
|
|
36
|
+
#ifdef __AVX2__
|
|
37
|
+
#include <immintrin.h>
|
|
38
|
+
#endif
|
|
32
39
|
|
|
33
40
|
namespace faiss {
|
|
34
41
|
|
|
@@ -254,13 +261,16 @@ void IndexIVFPQ::sa_decode(idx_t n, const uint8_t* codes, float* x) const {
|
|
|
254
261
|
}
|
|
255
262
|
}
|
|
256
263
|
|
|
264
|
+
// block size used in IndexIVFPQ::add_core_o
|
|
265
|
+
int index_ivfpq_add_core_o_bs = 32768;
|
|
266
|
+
|
|
257
267
|
void IndexIVFPQ::add_core_o(
|
|
258
268
|
idx_t n,
|
|
259
269
|
const float* x,
|
|
260
270
|
const idx_t* xids,
|
|
261
271
|
float* residuals_2,
|
|
262
272
|
const idx_t* precomputed_idx) {
|
|
263
|
-
idx_t bs =
|
|
273
|
+
idx_t bs = index_ivfpq_add_core_o_bs;
|
|
264
274
|
if (n > bs) {
|
|
265
275
|
for (idx_t i0 = 0; i0 < n; i0 += bs) {
|
|
266
276
|
idx_t i1 = std::min(i0 + bs, n);
|
|
@@ -584,7 +594,7 @@ struct QueryTables {
|
|
|
584
594
|
// field specific to query
|
|
585
595
|
const float* qi;
|
|
586
596
|
|
|
587
|
-
// query-specific
|
|
597
|
+
// query-specific initialization
|
|
588
598
|
void init_query(const float* qi) {
|
|
589
599
|
this->qi = qi;
|
|
590
600
|
if (metric_type == METRIC_INNER_PRODUCT)
|
|
@@ -793,10 +803,13 @@ struct QueryTables {
|
|
|
793
803
|
}
|
|
794
804
|
};
|
|
795
805
|
|
|
796
|
-
|
|
806
|
+
// This way of handling the sleector is not optimal since all distances
|
|
807
|
+
// are computed even if the id would filter it out.
|
|
808
|
+
template <class C, bool use_sel>
|
|
797
809
|
struct KnnSearchResults {
|
|
798
810
|
idx_t key;
|
|
799
811
|
const idx_t* ids;
|
|
812
|
+
const IDSelector* sel;
|
|
800
813
|
|
|
801
814
|
// heap params
|
|
802
815
|
size_t k;
|
|
@@ -805,6 +818,10 @@ struct KnnSearchResults {
|
|
|
805
818
|
|
|
806
819
|
size_t nup;
|
|
807
820
|
|
|
821
|
+
inline bool skip_entry(idx_t j) {
|
|
822
|
+
return use_sel && !sel->is_member(ids[j]);
|
|
823
|
+
}
|
|
824
|
+
|
|
808
825
|
inline void add(idx_t j, float dis) {
|
|
809
826
|
if (C::cmp(heap_sim[0], dis)) {
|
|
810
827
|
idx_t id = ids ? ids[j] : lo_build(key, j);
|
|
@@ -814,15 +831,20 @@ struct KnnSearchResults {
|
|
|
814
831
|
}
|
|
815
832
|
};
|
|
816
833
|
|
|
817
|
-
template <class C>
|
|
834
|
+
template <class C, bool use_sel>
|
|
818
835
|
struct RangeSearchResults {
|
|
819
836
|
idx_t key;
|
|
820
837
|
const idx_t* ids;
|
|
838
|
+
const IDSelector* sel;
|
|
821
839
|
|
|
822
840
|
// wrapped result structure
|
|
823
841
|
float radius;
|
|
824
842
|
RangeQueryResult& rres;
|
|
825
843
|
|
|
844
|
+
inline bool skip_entry(idx_t j) {
|
|
845
|
+
return use_sel && !sel->is_member(ids[j]);
|
|
846
|
+
}
|
|
847
|
+
|
|
826
848
|
inline void add(idx_t j, float dis) {
|
|
827
849
|
if (C::cmp(radius, dis)) {
|
|
828
850
|
idx_t id = ids ? ids[j] : lo_build(key, j);
|
|
@@ -864,23 +886,152 @@ struct IVFPQScannerT : QueryTables {
|
|
|
864
886
|
* Scaning the codes: simple PQ scan.
|
|
865
887
|
*****************************************************/
|
|
866
888
|
|
|
867
|
-
|
|
889
|
+
#ifdef __AVX2__
|
|
890
|
+
/// Returns the distance to a single code.
|
|
891
|
+
/// General-purpose version.
|
|
892
|
+
template <class SearchResultType, typename T = PQDecoder>
|
|
893
|
+
typename std::enable_if<!(std::is_same<T, PQDecoder8>::value), float>::
|
|
894
|
+
type inline distance_single_code(const uint8_t* code) const {
|
|
895
|
+
PQDecoder decoder(code, pq.nbits);
|
|
896
|
+
|
|
897
|
+
const float* tab = sim_table;
|
|
898
|
+
float result = 0;
|
|
899
|
+
|
|
900
|
+
for (size_t m = 0; m < pq.M; m++) {
|
|
901
|
+
result += tab[decoder.decode()];
|
|
902
|
+
tab += pq.ksub;
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
return result;
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
/// Returns the distance to a single code.
|
|
909
|
+
/// Specialized AVX2 PQDecoder8 version.
|
|
910
|
+
template <class SearchResultType, typename T = PQDecoder>
|
|
911
|
+
typename std::enable_if<(std::is_same<T, PQDecoder8>::value), float>::
|
|
912
|
+
type inline distance_single_code(const uint8_t* code) const {
|
|
913
|
+
float result = 0;
|
|
914
|
+
|
|
915
|
+
size_t m = 0;
|
|
916
|
+
const size_t pqM16 = pq.M / 16;
|
|
917
|
+
|
|
918
|
+
const float* tab = sim_table;
|
|
919
|
+
|
|
920
|
+
if (pqM16 > 0) {
|
|
921
|
+
// process 16 values per loop
|
|
922
|
+
|
|
923
|
+
const __m256i ksub = _mm256_set1_epi32(pq.ksub);
|
|
924
|
+
__m256i offsets_0 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
|
925
|
+
offsets_0 = _mm256_mullo_epi32(offsets_0, ksub);
|
|
926
|
+
|
|
927
|
+
// accumulators of partial sums
|
|
928
|
+
__m256 partialSum = _mm256_setzero_ps();
|
|
929
|
+
|
|
930
|
+
// loop
|
|
931
|
+
for (m = 0; m < pqM16 * 16; m += 16) {
|
|
932
|
+
// load 16 uint8 values
|
|
933
|
+
const __m128i mm1 =
|
|
934
|
+
_mm_loadu_si128((const __m128i_u*)(code + m));
|
|
935
|
+
{
|
|
936
|
+
// convert uint8 values (low part of __m128i) to int32
|
|
937
|
+
// values
|
|
938
|
+
const __m256i idx1 = _mm256_cvtepu8_epi32(mm1);
|
|
939
|
+
|
|
940
|
+
// add offsets
|
|
941
|
+
const __m256i indices_to_read_from =
|
|
942
|
+
_mm256_add_epi32(idx1, offsets_0);
|
|
943
|
+
|
|
944
|
+
// gather 8 values, similar to 8 operations of tab[idx]
|
|
945
|
+
__m256 collected = _mm256_i32gather_ps(
|
|
946
|
+
tab, indices_to_read_from, sizeof(float));
|
|
947
|
+
tab += pq.ksub * 8;
|
|
948
|
+
|
|
949
|
+
// collect partial sums
|
|
950
|
+
partialSum = _mm256_add_ps(partialSum, collected);
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
// move high 8 uint8 to low ones
|
|
954
|
+
const __m128i mm2 =
|
|
955
|
+
_mm_unpackhi_epi64(mm1, _mm_setzero_si128());
|
|
956
|
+
{
|
|
957
|
+
// convert uint8 values (low part of __m128i) to int32
|
|
958
|
+
// values
|
|
959
|
+
const __m256i idx1 = _mm256_cvtepu8_epi32(mm2);
|
|
960
|
+
|
|
961
|
+
// add offsets
|
|
962
|
+
const __m256i indices_to_read_from =
|
|
963
|
+
_mm256_add_epi32(idx1, offsets_0);
|
|
964
|
+
|
|
965
|
+
// gather 8 values, similar to 8 operations of tab[idx]
|
|
966
|
+
__m256 collected = _mm256_i32gather_ps(
|
|
967
|
+
tab, indices_to_read_from, sizeof(float));
|
|
968
|
+
tab += pq.ksub * 8;
|
|
969
|
+
|
|
970
|
+
// collect partial sums
|
|
971
|
+
partialSum = _mm256_add_ps(partialSum, collected);
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
|
|
975
|
+
// horizontal sum for partialSum
|
|
976
|
+
const __m256 h0 = _mm256_hadd_ps(partialSum, partialSum);
|
|
977
|
+
const __m256 h1 = _mm256_hadd_ps(h0, h0);
|
|
978
|
+
|
|
979
|
+
// extract high and low __m128 regs from __m256
|
|
980
|
+
const __m128 h2 = _mm256_extractf128_ps(h1, 1);
|
|
981
|
+
const __m128 h3 = _mm256_castps256_ps128(h1);
|
|
982
|
+
|
|
983
|
+
// get a final hsum into all 4 regs
|
|
984
|
+
const __m128 h4 = _mm_add_ss(h2, h3);
|
|
985
|
+
|
|
986
|
+
// extract f[0] from __m128
|
|
987
|
+
const float hsum = _mm_cvtss_f32(h4);
|
|
988
|
+
result += hsum;
|
|
989
|
+
}
|
|
990
|
+
|
|
991
|
+
//
|
|
992
|
+
if (m < pq.M) {
|
|
993
|
+
// process leftovers
|
|
994
|
+
PQDecoder decoder(code + m, pq.nbits);
|
|
995
|
+
|
|
996
|
+
for (; m < pq.M; m++) {
|
|
997
|
+
result += tab[decoder.decode()];
|
|
998
|
+
tab += pq.ksub;
|
|
999
|
+
}
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
return result;
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
#else
|
|
1006
|
+
/// Returns the distance to a single code.
|
|
1007
|
+
/// General-purpose version.
|
|
1008
|
+
template <class SearchResultType>
|
|
1009
|
+
inline float distance_single_code(const uint8_t* code) const {
|
|
1010
|
+
PQDecoder decoder(code, pq.nbits);
|
|
1011
|
+
|
|
1012
|
+
const float* tab = sim_table;
|
|
1013
|
+
float result = 0;
|
|
1014
|
+
|
|
1015
|
+
for (size_t m = 0; m < pq.M; m++) {
|
|
1016
|
+
result += tab[decoder.decode()];
|
|
1017
|
+
tab += pq.ksub;
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
return result;
|
|
1021
|
+
}
|
|
1022
|
+
#endif
|
|
1023
|
+
|
|
1024
|
+
/// version of the scan where we use precomputed tables.
|
|
868
1025
|
template <class SearchResultType>
|
|
869
1026
|
void scan_list_with_table(
|
|
870
1027
|
size_t ncode,
|
|
871
1028
|
const uint8_t* codes,
|
|
872
1029
|
SearchResultType& res) const {
|
|
873
|
-
for (size_t j = 0; j < ncode; j
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
float dis = dis0;
|
|
877
|
-
const float* tab = sim_table;
|
|
878
|
-
|
|
879
|
-
for (size_t m = 0; m < pq.M; m++) {
|
|
880
|
-
dis += tab[decoder.decode()];
|
|
881
|
-
tab += pq.ksub;
|
|
1030
|
+
for (size_t j = 0; j < ncode; j++, codes += pq.code_size) {
|
|
1031
|
+
if (res.skip_entry(j)) {
|
|
1032
|
+
continue;
|
|
882
1033
|
}
|
|
883
|
-
|
|
1034
|
+
float dis = dis0 + distance_single_code<SearchResultType>(codes);
|
|
884
1035
|
res.add(j, dis);
|
|
885
1036
|
}
|
|
886
1037
|
}
|
|
@@ -892,10 +1043,11 @@ struct IVFPQScannerT : QueryTables {
|
|
|
892
1043
|
size_t ncode,
|
|
893
1044
|
const uint8_t* codes,
|
|
894
1045
|
SearchResultType& res) const {
|
|
895
|
-
for (size_t j = 0; j < ncode; j
|
|
1046
|
+
for (size_t j = 0; j < ncode; j++, codes += pq.code_size) {
|
|
1047
|
+
if (res.skip_entry(j)) {
|
|
1048
|
+
continue;
|
|
1049
|
+
}
|
|
896
1050
|
PQDecoder decoder(codes, pq.nbits);
|
|
897
|
-
codes += pq.code_size;
|
|
898
|
-
|
|
899
1051
|
float dis = dis0;
|
|
900
1052
|
const float* tab = sim_table_2;
|
|
901
1053
|
|
|
@@ -929,9 +1081,11 @@ struct IVFPQScannerT : QueryTables {
|
|
|
929
1081
|
dis0 = 0;
|
|
930
1082
|
}
|
|
931
1083
|
|
|
932
|
-
for (size_t j = 0; j < ncode; j
|
|
1084
|
+
for (size_t j = 0; j < ncode; j++, codes += pq.code_size) {
|
|
1085
|
+
if (res.skip_entry(j)) {
|
|
1086
|
+
continue;
|
|
1087
|
+
}
|
|
933
1088
|
pq.decode(codes, decoded_vec);
|
|
934
|
-
codes += pq.code_size;
|
|
935
1089
|
|
|
936
1090
|
float dis;
|
|
937
1091
|
if (METRIC_TYPE == METRIC_INNER_PRODUCT) {
|
|
@@ -959,24 +1113,20 @@ struct IVFPQScannerT : QueryTables {
|
|
|
959
1113
|
|
|
960
1114
|
HammingComputer hc(q_code.data(), code_size);
|
|
961
1115
|
|
|
962
|
-
for (size_t j = 0; j < ncode; j
|
|
1116
|
+
for (size_t j = 0; j < ncode; j++, codes += code_size) {
|
|
1117
|
+
if (res.skip_entry(j)) {
|
|
1118
|
+
continue;
|
|
1119
|
+
}
|
|
963
1120
|
const uint8_t* b_code = codes;
|
|
964
1121
|
int hd = hc.hamming(b_code);
|
|
965
1122
|
if (hd < ht) {
|
|
966
1123
|
n_hamming_pass++;
|
|
967
|
-
PQDecoder decoder(codes, pq.nbits);
|
|
968
|
-
|
|
969
|
-
float dis = dis0;
|
|
970
|
-
const float* tab = sim_table;
|
|
971
1124
|
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
tab += pq.ksub;
|
|
975
|
-
}
|
|
1125
|
+
float dis =
|
|
1126
|
+
dis0 + distance_single_code<SearchResultType>(codes);
|
|
976
1127
|
|
|
977
1128
|
res.add(j, dis);
|
|
978
1129
|
}
|
|
979
|
-
codes += code_size;
|
|
980
1130
|
}
|
|
981
1131
|
#pragma omp critical
|
|
982
1132
|
{ indexIVFPQ_stats.n_hamming_pass += n_hamming_pass; }
|
|
@@ -1010,29 +1160,41 @@ struct IVFPQScannerT : QueryTables {
|
|
|
1010
1160
|
};
|
|
1011
1161
|
|
|
1012
1162
|
/* We put as many parameters as possible in template. Hopefully the
|
|
1013
|
-
* gain in runtime is worth the code bloat.
|
|
1014
|
-
*
|
|
1015
|
-
*
|
|
1016
|
-
*
|
|
1017
|
-
*
|
|
1018
|
-
|
|
1163
|
+
* gain in runtime is worth the code bloat.
|
|
1164
|
+
*
|
|
1165
|
+
* C is the comparator < or >, it is directly related to METRIC_TYPE.
|
|
1166
|
+
*
|
|
1167
|
+
* precompute_mode is how much we precompute (2 = precompute distance tables,
|
|
1168
|
+
* 1 = precompute pointers to distances, 0 = compute distances one by one).
|
|
1169
|
+
* Currently only 2 is supported
|
|
1170
|
+
*
|
|
1171
|
+
* use_sel: store or ignore the IDSelector
|
|
1172
|
+
*/
|
|
1173
|
+
template <MetricType METRIC_TYPE, class C, class PQDecoder, bool use_sel>
|
|
1019
1174
|
struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
|
1020
1175
|
InvertedListScanner {
|
|
1021
|
-
bool store_pairs;
|
|
1022
1176
|
int precompute_mode;
|
|
1177
|
+
const IDSelector* sel;
|
|
1023
1178
|
|
|
1024
|
-
IVFPQScanner(
|
|
1179
|
+
IVFPQScanner(
|
|
1180
|
+
const IndexIVFPQ& ivfpq,
|
|
1181
|
+
bool store_pairs,
|
|
1182
|
+
int precompute_mode,
|
|
1183
|
+
const IDSelector* sel)
|
|
1025
1184
|
: IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>(
|
|
1026
1185
|
ivfpq,
|
|
1027
1186
|
nullptr),
|
|
1028
|
-
|
|
1029
|
-
|
|
1187
|
+
precompute_mode(precompute_mode),
|
|
1188
|
+
sel(sel) {
|
|
1189
|
+
this->store_pairs = store_pairs;
|
|
1190
|
+
}
|
|
1030
1191
|
|
|
1031
1192
|
void set_query(const float* query) override {
|
|
1032
1193
|
this->init_query(query);
|
|
1033
1194
|
}
|
|
1034
1195
|
|
|
1035
1196
|
void set_list(idx_t list_no, float coarse_dis) override {
|
|
1197
|
+
this->list_no = list_no;
|
|
1036
1198
|
this->init_list(list_no, coarse_dis, precompute_mode);
|
|
1037
1199
|
}
|
|
1038
1200
|
|
|
@@ -1056,9 +1218,10 @@ struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
|
|
1056
1218
|
float* heap_sim,
|
|
1057
1219
|
idx_t* heap_ids,
|
|
1058
1220
|
size_t k) const override {
|
|
1059
|
-
KnnSearchResults<C> res = {
|
|
1221
|
+
KnnSearchResults<C, use_sel> res = {
|
|
1060
1222
|
/* key */ this->key,
|
|
1061
1223
|
/* ids */ this->store_pairs ? nullptr : ids,
|
|
1224
|
+
/* sel */ this->sel,
|
|
1062
1225
|
/* k */ k,
|
|
1063
1226
|
/* heap_sim */ heap_sim,
|
|
1064
1227
|
/* heap_ids */ heap_ids,
|
|
@@ -1085,9 +1248,10 @@ struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
|
|
1085
1248
|
const idx_t* ids,
|
|
1086
1249
|
float radius,
|
|
1087
1250
|
RangeQueryResult& rres) const override {
|
|
1088
|
-
RangeSearchResults<C> res = {
|
|
1251
|
+
RangeSearchResults<C, use_sel> res = {
|
|
1089
1252
|
/* key */ this->key,
|
|
1090
1253
|
/* ids */ this->store_pairs ? nullptr : ids,
|
|
1254
|
+
/* sel */ this->sel,
|
|
1091
1255
|
/* radius */ radius,
|
|
1092
1256
|
/* rres */ rres};
|
|
1093
1257
|
|
|
@@ -1106,32 +1270,53 @@ struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
|
|
1106
1270
|
}
|
|
1107
1271
|
};
|
|
1108
1272
|
|
|
1109
|
-
template <class PQDecoder>
|
|
1273
|
+
template <class PQDecoder, bool use_sel>
|
|
1110
1274
|
InvertedListScanner* get_InvertedListScanner1(
|
|
1111
1275
|
const IndexIVFPQ& index,
|
|
1112
|
-
bool store_pairs
|
|
1276
|
+
bool store_pairs,
|
|
1277
|
+
const IDSelector* sel) {
|
|
1113
1278
|
if (index.metric_type == METRIC_INNER_PRODUCT) {
|
|
1114
1279
|
return new IVFPQScanner<
|
|
1115
1280
|
METRIC_INNER_PRODUCT,
|
|
1116
1281
|
CMin<float, idx_t>,
|
|
1117
|
-
PQDecoder
|
|
1282
|
+
PQDecoder,
|
|
1283
|
+
use_sel>(index, store_pairs, 2, sel);
|
|
1118
1284
|
} else if (index.metric_type == METRIC_L2) {
|
|
1119
|
-
return new IVFPQScanner<
|
|
1120
|
-
|
|
1285
|
+
return new IVFPQScanner<
|
|
1286
|
+
METRIC_L2,
|
|
1287
|
+
CMax<float, idx_t>,
|
|
1288
|
+
PQDecoder,
|
|
1289
|
+
use_sel>(index, store_pairs, 2, sel);
|
|
1121
1290
|
}
|
|
1122
1291
|
return nullptr;
|
|
1123
1292
|
}
|
|
1124
1293
|
|
|
1294
|
+
template <bool use_sel>
|
|
1295
|
+
InvertedListScanner* get_InvertedListScanner2(
|
|
1296
|
+
const IndexIVFPQ& index,
|
|
1297
|
+
bool store_pairs,
|
|
1298
|
+
const IDSelector* sel) {
|
|
1299
|
+
if (index.pq.nbits == 8) {
|
|
1300
|
+
return get_InvertedListScanner1<PQDecoder8, use_sel>(
|
|
1301
|
+
index, store_pairs, sel);
|
|
1302
|
+
} else if (index.pq.nbits == 16) {
|
|
1303
|
+
return get_InvertedListScanner1<PQDecoder16, use_sel>(
|
|
1304
|
+
index, store_pairs, sel);
|
|
1305
|
+
} else {
|
|
1306
|
+
return get_InvertedListScanner1<PQDecoderGeneric, use_sel>(
|
|
1307
|
+
index, store_pairs, sel);
|
|
1308
|
+
}
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1125
1311
|
} // anonymous namespace
|
|
1126
1312
|
|
|
1127
1313
|
InvertedListScanner* IndexIVFPQ::get_InvertedListScanner(
|
|
1128
|
-
bool store_pairs
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
return get_InvertedListScanner1<PQDecoder16>(*this, store_pairs);
|
|
1314
|
+
bool store_pairs,
|
|
1315
|
+
const IDSelector* sel) const {
|
|
1316
|
+
if (sel) {
|
|
1317
|
+
return get_InvertedListScanner2<true>(*this, store_pairs, sel);
|
|
1133
1318
|
} else {
|
|
1134
|
-
return
|
|
1319
|
+
return get_InvertedListScanner2<false>(*this, store_pairs, sel);
|
|
1135
1320
|
}
|
|
1136
1321
|
return nullptr;
|
|
1137
1322
|
}
|
|
@@ -134,7 +134,8 @@ struct IndexIVFPQ : IndexIVF {
|
|
|
134
134
|
float* x) const;
|
|
135
135
|
|
|
136
136
|
InvertedListScanner* get_InvertedListScanner(
|
|
137
|
-
bool store_pairs
|
|
137
|
+
bool store_pairs,
|
|
138
|
+
const IDSelector* sel) const override;
|
|
138
139
|
|
|
139
140
|
/// build precomputed table
|
|
140
141
|
void precompute_table();
|
|
@@ -142,6 +143,9 @@ struct IndexIVFPQ : IndexIVF {
|
|
|
142
143
|
IndexIVFPQ();
|
|
143
144
|
};
|
|
144
145
|
|
|
146
|
+
// block size used in IndexIVFPQ::add_core_o
|
|
147
|
+
FAISS_API extern int index_ivfpq_add_core_o_bs;
|
|
148
|
+
|
|
145
149
|
/** Pre-compute distance tables for IVFPQ with by-residual and METRIC_L2
|
|
146
150
|
*
|
|
147
151
|
* @param use_precomputed_table (I/O)
|
|
@@ -150,7 +154,7 @@ struct IndexIVFPQ : IndexIVF {
|
|
|
150
154
|
* < precomputed_tables_max_bytes), set use_precomputed_table on
|
|
151
155
|
* output =1: tables that work for all quantizers (size 256 * nlist * M) =2:
|
|
152
156
|
* specific version for MultiIndexQuantizer (much more compact)
|
|
153
|
-
* @param precomputed_table precomputed table to
|
|
157
|
+
* @param precomputed_table precomputed table to initialize
|
|
154
158
|
*/
|
|
155
159
|
|
|
156
160
|
void initialize_IVFPQ_precomputed_table(
|