faiss 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +23 -21
- data/ext/faiss/extconf.rb +11 -0
- data/ext/faiss/index.cpp +4 -4
- data/ext/faiss/index_binary.cpp +6 -6
- data/ext/faiss/product_quantizer.cpp +4 -4
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +13 -0
- data/vendor/faiss/faiss/IVFlib.cpp +101 -2
- data/vendor/faiss/faiss/IVFlib.h +26 -2
- data/vendor/faiss/faiss/Index.cpp +36 -3
- data/vendor/faiss/faiss/Index.h +43 -6
- data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
- data/vendor/faiss/faiss/Index2Layer.h +6 -1
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
- data/vendor/faiss/faiss/IndexBinary.h +18 -3
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
- data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
- data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
- data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
- data/vendor/faiss/faiss/IndexFastScan.h +145 -0
- data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
- data/vendor/faiss/faiss/IndexFlat.h +7 -4
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
- data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
- data/vendor/faiss/faiss/IndexHNSW.h +4 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
- data/vendor/faiss/faiss/IndexIDMap.h +107 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
- data/vendor/faiss/faiss/IndexIVF.h +35 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
- data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
- data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
- data/vendor/faiss/faiss/IndexLSH.h +2 -1
- data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
- data/vendor/faiss/faiss/IndexLattice.h +3 -1
- data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
- data/vendor/faiss/faiss/IndexNSG.h +25 -1
- data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
- data/vendor/faiss/faiss/IndexPQ.h +19 -5
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
- data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
- data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
- data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
- data/vendor/faiss/faiss/IndexRefine.h +4 -2
- data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
- data/vendor/faiss/faiss/IndexReplicas.h +2 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
- data/vendor/faiss/faiss/IndexShards.cpp +4 -1
- data/vendor/faiss/faiss/IndexShards.h +2 -1
- data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
- data/vendor/faiss/faiss/MetaIndexes.h +3 -81
- data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
- data/vendor/faiss/faiss/VectorTransform.h +22 -4
- data/vendor/faiss/faiss/clone_index.cpp +23 -1
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
- data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
- data/vendor/faiss/faiss/impl/HNSW.h +19 -16
- data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
- data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
- data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
- data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
- data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
- data/vendor/faiss/faiss/index_factory.cpp +196 -7
- data/vendor/faiss/faiss/index_io.h +5 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
- data/vendor/faiss/faiss/utils/Heap.h +31 -15
- data/vendor/faiss/faiss/utils/distances.cpp +380 -56
- data/vendor/faiss/faiss/utils/distances.h +113 -15
- data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
- data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
- data/vendor/faiss/faiss/utils/fp16.h +11 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
- data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
- data/vendor/faiss/faiss/utils/random.cpp +53 -0
- data/vendor/faiss/faiss/utils/random.h +5 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
- data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
- metadata +37 -3
|
@@ -29,6 +29,13 @@
|
|
|
29
29
|
#include <faiss/impl/FaissAssert.h>
|
|
30
30
|
|
|
31
31
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
32
|
+
#include <faiss/impl/IDSelector.h>
|
|
33
|
+
|
|
34
|
+
#include <faiss/impl/ProductQuantizer.h>
|
|
35
|
+
|
|
36
|
+
#ifdef __AVX2__
|
|
37
|
+
#include <immintrin.h>
|
|
38
|
+
#endif
|
|
32
39
|
|
|
33
40
|
namespace faiss {
|
|
34
41
|
|
|
@@ -254,13 +261,16 @@ void IndexIVFPQ::sa_decode(idx_t n, const uint8_t* codes, float* x) const {
|
|
|
254
261
|
}
|
|
255
262
|
}
|
|
256
263
|
|
|
264
|
+
// block size used in IndexIVFPQ::add_core_o
|
|
265
|
+
int index_ivfpq_add_core_o_bs = 32768;
|
|
266
|
+
|
|
257
267
|
void IndexIVFPQ::add_core_o(
|
|
258
268
|
idx_t n,
|
|
259
269
|
const float* x,
|
|
260
270
|
const idx_t* xids,
|
|
261
271
|
float* residuals_2,
|
|
262
272
|
const idx_t* precomputed_idx) {
|
|
263
|
-
idx_t bs =
|
|
273
|
+
idx_t bs = index_ivfpq_add_core_o_bs;
|
|
264
274
|
if (n > bs) {
|
|
265
275
|
for (idx_t i0 = 0; i0 < n; i0 += bs) {
|
|
266
276
|
idx_t i1 = std::min(i0 + bs, n);
|
|
@@ -793,10 +803,13 @@ struct QueryTables {
|
|
|
793
803
|
}
|
|
794
804
|
};
|
|
795
805
|
|
|
796
|
-
|
|
806
|
+
// This way of handling the sleector is not optimal since all distances
|
|
807
|
+
// are computed even if the id would filter it out.
|
|
808
|
+
template <class C, bool use_sel>
|
|
797
809
|
struct KnnSearchResults {
|
|
798
810
|
idx_t key;
|
|
799
811
|
const idx_t* ids;
|
|
812
|
+
const IDSelector* sel;
|
|
800
813
|
|
|
801
814
|
// heap params
|
|
802
815
|
size_t k;
|
|
@@ -805,6 +818,10 @@ struct KnnSearchResults {
|
|
|
805
818
|
|
|
806
819
|
size_t nup;
|
|
807
820
|
|
|
821
|
+
inline bool skip_entry(idx_t j) {
|
|
822
|
+
return use_sel && !sel->is_member(ids[j]);
|
|
823
|
+
}
|
|
824
|
+
|
|
808
825
|
inline void add(idx_t j, float dis) {
|
|
809
826
|
if (C::cmp(heap_sim[0], dis)) {
|
|
810
827
|
idx_t id = ids ? ids[j] : lo_build(key, j);
|
|
@@ -814,15 +831,20 @@ struct KnnSearchResults {
|
|
|
814
831
|
}
|
|
815
832
|
};
|
|
816
833
|
|
|
817
|
-
template <class C>
|
|
834
|
+
template <class C, bool use_sel>
|
|
818
835
|
struct RangeSearchResults {
|
|
819
836
|
idx_t key;
|
|
820
837
|
const idx_t* ids;
|
|
838
|
+
const IDSelector* sel;
|
|
821
839
|
|
|
822
840
|
// wrapped result structure
|
|
823
841
|
float radius;
|
|
824
842
|
RangeQueryResult& rres;
|
|
825
843
|
|
|
844
|
+
inline bool skip_entry(idx_t j) {
|
|
845
|
+
return use_sel && !sel->is_member(ids[j]);
|
|
846
|
+
}
|
|
847
|
+
|
|
826
848
|
inline void add(idx_t j, float dis) {
|
|
827
849
|
if (C::cmp(radius, dis)) {
|
|
828
850
|
idx_t id = ids ? ids[j] : lo_build(key, j);
|
|
@@ -864,23 +886,152 @@ struct IVFPQScannerT : QueryTables {
|
|
|
864
886
|
* Scaning the codes: simple PQ scan.
|
|
865
887
|
*****************************************************/
|
|
866
888
|
|
|
867
|
-
|
|
889
|
+
#ifdef __AVX2__
|
|
890
|
+
/// Returns the distance to a single code.
|
|
891
|
+
/// General-purpose version.
|
|
892
|
+
template <class SearchResultType, typename T = PQDecoder>
|
|
893
|
+
typename std::enable_if<!(std::is_same<T, PQDecoder8>::value), float>::
|
|
894
|
+
type inline distance_single_code(const uint8_t* code) const {
|
|
895
|
+
PQDecoder decoder(code, pq.nbits);
|
|
896
|
+
|
|
897
|
+
const float* tab = sim_table;
|
|
898
|
+
float result = 0;
|
|
899
|
+
|
|
900
|
+
for (size_t m = 0; m < pq.M; m++) {
|
|
901
|
+
result += tab[decoder.decode()];
|
|
902
|
+
tab += pq.ksub;
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
return result;
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
/// Returns the distance to a single code.
|
|
909
|
+
/// Specialized AVX2 PQDecoder8 version.
|
|
910
|
+
template <class SearchResultType, typename T = PQDecoder>
|
|
911
|
+
typename std::enable_if<(std::is_same<T, PQDecoder8>::value), float>::
|
|
912
|
+
type inline distance_single_code(const uint8_t* code) const {
|
|
913
|
+
float result = 0;
|
|
914
|
+
|
|
915
|
+
size_t m = 0;
|
|
916
|
+
const size_t pqM16 = pq.M / 16;
|
|
917
|
+
|
|
918
|
+
const float* tab = sim_table;
|
|
919
|
+
|
|
920
|
+
if (pqM16 > 0) {
|
|
921
|
+
// process 16 values per loop
|
|
922
|
+
|
|
923
|
+
const __m256i ksub = _mm256_set1_epi32(pq.ksub);
|
|
924
|
+
__m256i offsets_0 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
|
925
|
+
offsets_0 = _mm256_mullo_epi32(offsets_0, ksub);
|
|
926
|
+
|
|
927
|
+
// accumulators of partial sums
|
|
928
|
+
__m256 partialSum = _mm256_setzero_ps();
|
|
929
|
+
|
|
930
|
+
// loop
|
|
931
|
+
for (m = 0; m < pqM16 * 16; m += 16) {
|
|
932
|
+
// load 16 uint8 values
|
|
933
|
+
const __m128i mm1 =
|
|
934
|
+
_mm_loadu_si128((const __m128i_u*)(code + m));
|
|
935
|
+
{
|
|
936
|
+
// convert uint8 values (low part of __m128i) to int32
|
|
937
|
+
// values
|
|
938
|
+
const __m256i idx1 = _mm256_cvtepu8_epi32(mm1);
|
|
939
|
+
|
|
940
|
+
// add offsets
|
|
941
|
+
const __m256i indices_to_read_from =
|
|
942
|
+
_mm256_add_epi32(idx1, offsets_0);
|
|
943
|
+
|
|
944
|
+
// gather 8 values, similar to 8 operations of tab[idx]
|
|
945
|
+
__m256 collected = _mm256_i32gather_ps(
|
|
946
|
+
tab, indices_to_read_from, sizeof(float));
|
|
947
|
+
tab += pq.ksub * 8;
|
|
948
|
+
|
|
949
|
+
// collect partial sums
|
|
950
|
+
partialSum = _mm256_add_ps(partialSum, collected);
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
// move high 8 uint8 to low ones
|
|
954
|
+
const __m128i mm2 =
|
|
955
|
+
_mm_unpackhi_epi64(mm1, _mm_setzero_si128());
|
|
956
|
+
{
|
|
957
|
+
// convert uint8 values (low part of __m128i) to int32
|
|
958
|
+
// values
|
|
959
|
+
const __m256i idx1 = _mm256_cvtepu8_epi32(mm2);
|
|
960
|
+
|
|
961
|
+
// add offsets
|
|
962
|
+
const __m256i indices_to_read_from =
|
|
963
|
+
_mm256_add_epi32(idx1, offsets_0);
|
|
964
|
+
|
|
965
|
+
// gather 8 values, similar to 8 operations of tab[idx]
|
|
966
|
+
__m256 collected = _mm256_i32gather_ps(
|
|
967
|
+
tab, indices_to_read_from, sizeof(float));
|
|
968
|
+
tab += pq.ksub * 8;
|
|
969
|
+
|
|
970
|
+
// collect partial sums
|
|
971
|
+
partialSum = _mm256_add_ps(partialSum, collected);
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
|
|
975
|
+
// horizontal sum for partialSum
|
|
976
|
+
const __m256 h0 = _mm256_hadd_ps(partialSum, partialSum);
|
|
977
|
+
const __m256 h1 = _mm256_hadd_ps(h0, h0);
|
|
978
|
+
|
|
979
|
+
// extract high and low __m128 regs from __m256
|
|
980
|
+
const __m128 h2 = _mm256_extractf128_ps(h1, 1);
|
|
981
|
+
const __m128 h3 = _mm256_castps256_ps128(h1);
|
|
982
|
+
|
|
983
|
+
// get a final hsum into all 4 regs
|
|
984
|
+
const __m128 h4 = _mm_add_ss(h2, h3);
|
|
985
|
+
|
|
986
|
+
// extract f[0] from __m128
|
|
987
|
+
const float hsum = _mm_cvtss_f32(h4);
|
|
988
|
+
result += hsum;
|
|
989
|
+
}
|
|
990
|
+
|
|
991
|
+
//
|
|
992
|
+
if (m < pq.M) {
|
|
993
|
+
// process leftovers
|
|
994
|
+
PQDecoder decoder(code + m, pq.nbits);
|
|
995
|
+
|
|
996
|
+
for (; m < pq.M; m++) {
|
|
997
|
+
result += tab[decoder.decode()];
|
|
998
|
+
tab += pq.ksub;
|
|
999
|
+
}
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
return result;
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
#else
|
|
1006
|
+
/// Returns the distance to a single code.
|
|
1007
|
+
/// General-purpose version.
|
|
1008
|
+
template <class SearchResultType>
|
|
1009
|
+
inline float distance_single_code(const uint8_t* code) const {
|
|
1010
|
+
PQDecoder decoder(code, pq.nbits);
|
|
1011
|
+
|
|
1012
|
+
const float* tab = sim_table;
|
|
1013
|
+
float result = 0;
|
|
1014
|
+
|
|
1015
|
+
for (size_t m = 0; m < pq.M; m++) {
|
|
1016
|
+
result += tab[decoder.decode()];
|
|
1017
|
+
tab += pq.ksub;
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
return result;
|
|
1021
|
+
}
|
|
1022
|
+
#endif
|
|
1023
|
+
|
|
1024
|
+
/// version of the scan where we use precomputed tables.
|
|
868
1025
|
template <class SearchResultType>
|
|
869
1026
|
void scan_list_with_table(
|
|
870
1027
|
size_t ncode,
|
|
871
1028
|
const uint8_t* codes,
|
|
872
1029
|
SearchResultType& res) const {
|
|
873
|
-
for (size_t j = 0; j < ncode; j
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
float dis = dis0;
|
|
877
|
-
const float* tab = sim_table;
|
|
878
|
-
|
|
879
|
-
for (size_t m = 0; m < pq.M; m++) {
|
|
880
|
-
dis += tab[decoder.decode()];
|
|
881
|
-
tab += pq.ksub;
|
|
1030
|
+
for (size_t j = 0; j < ncode; j++, codes += pq.code_size) {
|
|
1031
|
+
if (res.skip_entry(j)) {
|
|
1032
|
+
continue;
|
|
882
1033
|
}
|
|
883
|
-
|
|
1034
|
+
float dis = dis0 + distance_single_code<SearchResultType>(codes);
|
|
884
1035
|
res.add(j, dis);
|
|
885
1036
|
}
|
|
886
1037
|
}
|
|
@@ -892,10 +1043,11 @@ struct IVFPQScannerT : QueryTables {
|
|
|
892
1043
|
size_t ncode,
|
|
893
1044
|
const uint8_t* codes,
|
|
894
1045
|
SearchResultType& res) const {
|
|
895
|
-
for (size_t j = 0; j < ncode; j
|
|
1046
|
+
for (size_t j = 0; j < ncode; j++, codes += pq.code_size) {
|
|
1047
|
+
if (res.skip_entry(j)) {
|
|
1048
|
+
continue;
|
|
1049
|
+
}
|
|
896
1050
|
PQDecoder decoder(codes, pq.nbits);
|
|
897
|
-
codes += pq.code_size;
|
|
898
|
-
|
|
899
1051
|
float dis = dis0;
|
|
900
1052
|
const float* tab = sim_table_2;
|
|
901
1053
|
|
|
@@ -929,9 +1081,11 @@ struct IVFPQScannerT : QueryTables {
|
|
|
929
1081
|
dis0 = 0;
|
|
930
1082
|
}
|
|
931
1083
|
|
|
932
|
-
for (size_t j = 0; j < ncode; j
|
|
1084
|
+
for (size_t j = 0; j < ncode; j++, codes += pq.code_size) {
|
|
1085
|
+
if (res.skip_entry(j)) {
|
|
1086
|
+
continue;
|
|
1087
|
+
}
|
|
933
1088
|
pq.decode(codes, decoded_vec);
|
|
934
|
-
codes += pq.code_size;
|
|
935
1089
|
|
|
936
1090
|
float dis;
|
|
937
1091
|
if (METRIC_TYPE == METRIC_INNER_PRODUCT) {
|
|
@@ -959,24 +1113,20 @@ struct IVFPQScannerT : QueryTables {
|
|
|
959
1113
|
|
|
960
1114
|
HammingComputer hc(q_code.data(), code_size);
|
|
961
1115
|
|
|
962
|
-
for (size_t j = 0; j < ncode; j
|
|
1116
|
+
for (size_t j = 0; j < ncode; j++, codes += code_size) {
|
|
1117
|
+
if (res.skip_entry(j)) {
|
|
1118
|
+
continue;
|
|
1119
|
+
}
|
|
963
1120
|
const uint8_t* b_code = codes;
|
|
964
1121
|
int hd = hc.hamming(b_code);
|
|
965
1122
|
if (hd < ht) {
|
|
966
1123
|
n_hamming_pass++;
|
|
967
|
-
PQDecoder decoder(codes, pq.nbits);
|
|
968
1124
|
|
|
969
|
-
float dis =
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
for (size_t m = 0; m < pq.M; m++) {
|
|
973
|
-
dis += tab[decoder.decode()];
|
|
974
|
-
tab += pq.ksub;
|
|
975
|
-
}
|
|
1125
|
+
float dis =
|
|
1126
|
+
dis0 + distance_single_code<SearchResultType>(codes);
|
|
976
1127
|
|
|
977
1128
|
res.add(j, dis);
|
|
978
1129
|
}
|
|
979
|
-
codes += code_size;
|
|
980
1130
|
}
|
|
981
1131
|
#pragma omp critical
|
|
982
1132
|
{ indexIVFPQ_stats.n_hamming_pass += n_hamming_pass; }
|
|
@@ -1010,21 +1160,32 @@ struct IVFPQScannerT : QueryTables {
|
|
|
1010
1160
|
};
|
|
1011
1161
|
|
|
1012
1162
|
/* We put as many parameters as possible in template. Hopefully the
|
|
1013
|
-
* gain in runtime is worth the code bloat.
|
|
1014
|
-
*
|
|
1015
|
-
*
|
|
1016
|
-
*
|
|
1017
|
-
*
|
|
1018
|
-
|
|
1163
|
+
* gain in runtime is worth the code bloat.
|
|
1164
|
+
*
|
|
1165
|
+
* C is the comparator < or >, it is directly related to METRIC_TYPE.
|
|
1166
|
+
*
|
|
1167
|
+
* precompute_mode is how much we precompute (2 = precompute distance tables,
|
|
1168
|
+
* 1 = precompute pointers to distances, 0 = compute distances one by one).
|
|
1169
|
+
* Currently only 2 is supported
|
|
1170
|
+
*
|
|
1171
|
+
* use_sel: store or ignore the IDSelector
|
|
1172
|
+
*/
|
|
1173
|
+
template <MetricType METRIC_TYPE, class C, class PQDecoder, bool use_sel>
|
|
1019
1174
|
struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
|
1020
1175
|
InvertedListScanner {
|
|
1021
1176
|
int precompute_mode;
|
|
1177
|
+
const IDSelector* sel;
|
|
1022
1178
|
|
|
1023
|
-
IVFPQScanner(
|
|
1179
|
+
IVFPQScanner(
|
|
1180
|
+
const IndexIVFPQ& ivfpq,
|
|
1181
|
+
bool store_pairs,
|
|
1182
|
+
int precompute_mode,
|
|
1183
|
+
const IDSelector* sel)
|
|
1024
1184
|
: IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>(
|
|
1025
1185
|
ivfpq,
|
|
1026
1186
|
nullptr),
|
|
1027
|
-
precompute_mode(precompute_mode)
|
|
1187
|
+
precompute_mode(precompute_mode),
|
|
1188
|
+
sel(sel) {
|
|
1028
1189
|
this->store_pairs = store_pairs;
|
|
1029
1190
|
}
|
|
1030
1191
|
|
|
@@ -1057,9 +1218,10 @@ struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
|
|
1057
1218
|
float* heap_sim,
|
|
1058
1219
|
idx_t* heap_ids,
|
|
1059
1220
|
size_t k) const override {
|
|
1060
|
-
KnnSearchResults<C> res = {
|
|
1221
|
+
KnnSearchResults<C, use_sel> res = {
|
|
1061
1222
|
/* key */ this->key,
|
|
1062
1223
|
/* ids */ this->store_pairs ? nullptr : ids,
|
|
1224
|
+
/* sel */ this->sel,
|
|
1063
1225
|
/* k */ k,
|
|
1064
1226
|
/* heap_sim */ heap_sim,
|
|
1065
1227
|
/* heap_ids */ heap_ids,
|
|
@@ -1086,9 +1248,10 @@ struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
|
|
1086
1248
|
const idx_t* ids,
|
|
1087
1249
|
float radius,
|
|
1088
1250
|
RangeQueryResult& rres) const override {
|
|
1089
|
-
RangeSearchResults<C> res = {
|
|
1251
|
+
RangeSearchResults<C, use_sel> res = {
|
|
1090
1252
|
/* key */ this->key,
|
|
1091
1253
|
/* ids */ this->store_pairs ? nullptr : ids,
|
|
1254
|
+
/* sel */ this->sel,
|
|
1092
1255
|
/* radius */ radius,
|
|
1093
1256
|
/* rres */ rres};
|
|
1094
1257
|
|
|
@@ -1107,32 +1270,53 @@ struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
|
|
1107
1270
|
}
|
|
1108
1271
|
};
|
|
1109
1272
|
|
|
1110
|
-
template <class PQDecoder>
|
|
1273
|
+
template <class PQDecoder, bool use_sel>
|
|
1111
1274
|
InvertedListScanner* get_InvertedListScanner1(
|
|
1112
1275
|
const IndexIVFPQ& index,
|
|
1113
|
-
bool store_pairs
|
|
1276
|
+
bool store_pairs,
|
|
1277
|
+
const IDSelector* sel) {
|
|
1114
1278
|
if (index.metric_type == METRIC_INNER_PRODUCT) {
|
|
1115
1279
|
return new IVFPQScanner<
|
|
1116
1280
|
METRIC_INNER_PRODUCT,
|
|
1117
1281
|
CMin<float, idx_t>,
|
|
1118
|
-
PQDecoder
|
|
1282
|
+
PQDecoder,
|
|
1283
|
+
use_sel>(index, store_pairs, 2, sel);
|
|
1119
1284
|
} else if (index.metric_type == METRIC_L2) {
|
|
1120
|
-
return new IVFPQScanner<
|
|
1121
|
-
|
|
1285
|
+
return new IVFPQScanner<
|
|
1286
|
+
METRIC_L2,
|
|
1287
|
+
CMax<float, idx_t>,
|
|
1288
|
+
PQDecoder,
|
|
1289
|
+
use_sel>(index, store_pairs, 2, sel);
|
|
1122
1290
|
}
|
|
1123
1291
|
return nullptr;
|
|
1124
1292
|
}
|
|
1125
1293
|
|
|
1294
|
+
template <bool use_sel>
|
|
1295
|
+
InvertedListScanner* get_InvertedListScanner2(
|
|
1296
|
+
const IndexIVFPQ& index,
|
|
1297
|
+
bool store_pairs,
|
|
1298
|
+
const IDSelector* sel) {
|
|
1299
|
+
if (index.pq.nbits == 8) {
|
|
1300
|
+
return get_InvertedListScanner1<PQDecoder8, use_sel>(
|
|
1301
|
+
index, store_pairs, sel);
|
|
1302
|
+
} else if (index.pq.nbits == 16) {
|
|
1303
|
+
return get_InvertedListScanner1<PQDecoder16, use_sel>(
|
|
1304
|
+
index, store_pairs, sel);
|
|
1305
|
+
} else {
|
|
1306
|
+
return get_InvertedListScanner1<PQDecoderGeneric, use_sel>(
|
|
1307
|
+
index, store_pairs, sel);
|
|
1308
|
+
}
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1126
1311
|
} // anonymous namespace
|
|
1127
1312
|
|
|
1128
1313
|
InvertedListScanner* IndexIVFPQ::get_InvertedListScanner(
|
|
1129
|
-
bool store_pairs
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
return get_InvertedListScanner1<PQDecoder16>(*this, store_pairs);
|
|
1314
|
+
bool store_pairs,
|
|
1315
|
+
const IDSelector* sel) const {
|
|
1316
|
+
if (sel) {
|
|
1317
|
+
return get_InvertedListScanner2<true>(*this, store_pairs, sel);
|
|
1134
1318
|
} else {
|
|
1135
|
-
return
|
|
1319
|
+
return get_InvertedListScanner2<false>(*this, store_pairs, sel);
|
|
1136
1320
|
}
|
|
1137
1321
|
return nullptr;
|
|
1138
1322
|
}
|
|
@@ -134,7 +134,8 @@ struct IndexIVFPQ : IndexIVF {
|
|
|
134
134
|
float* x) const;
|
|
135
135
|
|
|
136
136
|
InvertedListScanner* get_InvertedListScanner(
|
|
137
|
-
bool store_pairs
|
|
137
|
+
bool store_pairs,
|
|
138
|
+
const IDSelector* sel) const override;
|
|
138
139
|
|
|
139
140
|
/// build precomputed table
|
|
140
141
|
void precompute_table();
|
|
@@ -142,6 +143,9 @@ struct IndexIVFPQ : IndexIVF {
|
|
|
142
143
|
IndexIVFPQ();
|
|
143
144
|
};
|
|
144
145
|
|
|
146
|
+
// block size used in IndexIVFPQ::add_core_o
|
|
147
|
+
FAISS_API extern int index_ivfpq_add_core_o_bs;
|
|
148
|
+
|
|
145
149
|
/** Pre-compute distance tables for IVFPQ with by-residual and METRIC_L2
|
|
146
150
|
*
|
|
147
151
|
* @param use_precomputed_table (I/O)
|