faiss 0.2.5 → 0.2.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/ext/faiss/index.cpp +13 -0
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +2 -2
- data/vendor/faiss/faiss/AutoTune.cpp +15 -4
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +1 -5
- data/vendor/faiss/faiss/Clustering.h +0 -2
- data/vendor/faiss/faiss/IVFlib.h +0 -2
- data/vendor/faiss/faiss/Index.h +1 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
- data/vendor/faiss/faiss/IndexBinary.h +0 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
- data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
- data/vendor/faiss/faiss/IndexFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
- data/vendor/faiss/faiss/IndexHNSW.h +0 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
- data/vendor/faiss/faiss/IndexIDMap.h +0 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
- data/vendor/faiss/faiss/IndexIVF.h +121 -61
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
- data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
- data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
- data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
- data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
- data/vendor/faiss/faiss/IndexReplicas.h +0 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
- data/vendor/faiss/faiss/IndexShards.cpp +26 -109
- data/vendor/faiss/faiss/IndexShards.h +2 -3
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
- data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
- data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
- data/vendor/faiss/faiss/MetaIndexes.h +29 -0
- data/vendor/faiss/faiss/MetricType.h +14 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
- data/vendor/faiss/faiss/VectorTransform.h +1 -3
- data/vendor/faiss/faiss/clone_index.cpp +232 -18
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
- data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
- data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
- data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
- data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
- data/vendor/faiss/faiss/impl/HNSW.h +6 -9
- data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
- data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
- data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
- data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
- data/vendor/faiss/faiss/impl/NSG.h +4 -7
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
- data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
- data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +8 -10
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
- data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
- data/vendor/faiss/faiss/utils/Heap.h +35 -1
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
- data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
- data/vendor/faiss/faiss/utils/distances.cpp +61 -7
- data/vendor/faiss/faiss/utils/distances.h +11 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
- data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
- data/vendor/faiss/faiss/utils/fp16.h +7 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
- data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
- data/vendor/faiss/faiss/utils/hamming.h +21 -10
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
- data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
- data/vendor/faiss/faiss/utils/sorting.h +71 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
- data/vendor/faiss/faiss/utils/utils.cpp +4 -176
- data/vendor/faiss/faiss/utils/utils.h +2 -9
- metadata +30 -4
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -33,9 +33,7 @@
|
|
33
33
|
|
34
34
|
#include <faiss/impl/ProductQuantizer.h>
|
35
35
|
|
36
|
-
#
|
37
|
-
#include <immintrin.h>
|
38
|
-
#endif
|
36
|
+
#include <faiss/impl/code_distance/code_distance.h>
|
39
37
|
|
40
38
|
namespace faiss {
|
41
39
|
|
@@ -51,7 +49,6 @@ IndexIVFPQ::IndexIVFPQ(
|
|
51
49
|
size_t nbits_per_idx,
|
52
50
|
MetricType metric)
|
53
51
|
: IndexIVF(quantizer, d, nlist, 0, metric), pq(d, M, nbits_per_idx) {
|
54
|
-
FAISS_THROW_IF_NOT(nbits_per_idx <= 8);
|
55
52
|
code_size = pq.code_size;
|
56
53
|
invlists->code_size = code_size;
|
57
54
|
is_trained = false;
|
@@ -198,9 +195,9 @@ void IndexIVFPQ::add_core(
|
|
198
195
|
|
199
196
|
static float* compute_residuals(
|
200
197
|
const Index* quantizer,
|
201
|
-
|
198
|
+
idx_t n,
|
202
199
|
const float* x,
|
203
|
-
const
|
200
|
+
const idx_t* list_nos) {
|
204
201
|
size_t d = quantizer->d;
|
205
202
|
float* residuals = new float[n * d];
|
206
203
|
// TODO: parallelize?
|
@@ -423,6 +420,7 @@ void initialize_IVFPQ_precomputed_table(
|
|
423
420
|
const Index* quantizer,
|
424
421
|
const ProductQuantizer& pq,
|
425
422
|
AlignedTable<float>& precomputed_table,
|
423
|
+
bool by_residual,
|
426
424
|
bool verbose) {
|
427
425
|
size_t nlist = quantizer->ntotal;
|
428
426
|
size_t d = quantizer->d;
|
@@ -434,10 +432,10 @@ void initialize_IVFPQ_precomputed_table(
|
|
434
432
|
}
|
435
433
|
|
436
434
|
if (use_precomputed_table == 0) { // then choose the type of table
|
437
|
-
if (quantizer->metric_type ==
|
435
|
+
if (!(quantizer->metric_type == METRIC_L2 && by_residual)) {
|
438
436
|
if (verbose) {
|
439
437
|
printf("IndexIVFPQ::precompute_table: precomputed "
|
440
|
-
"tables
|
438
|
+
"tables needed only for L2 metric and by_residual is enabled\n");
|
441
439
|
}
|
442
440
|
precomputed_table.resize(0);
|
443
441
|
return;
|
@@ -516,13 +514,16 @@ void initialize_IVFPQ_precomputed_table(
|
|
516
514
|
|
517
515
|
void IndexIVFPQ::precompute_table() {
|
518
516
|
initialize_IVFPQ_precomputed_table(
|
519
|
-
use_precomputed_table,
|
517
|
+
use_precomputed_table,
|
518
|
+
quantizer,
|
519
|
+
pq,
|
520
|
+
precomputed_table,
|
521
|
+
by_residual,
|
522
|
+
verbose);
|
520
523
|
}
|
521
524
|
|
522
525
|
namespace {
|
523
526
|
|
524
|
-
using idx_t = Index::idx_t;
|
525
|
-
|
526
527
|
#define TIC t0 = get_cycles()
|
527
528
|
#define TOC get_cycles() - t0
|
528
529
|
|
@@ -623,7 +624,7 @@ struct QueryTables {
|
|
623
624
|
*****************************************************/
|
624
625
|
|
625
626
|
// fields specific to list
|
626
|
-
|
627
|
+
idx_t key;
|
627
628
|
float coarse_dis;
|
628
629
|
std::vector<uint8_t> q_code;
|
629
630
|
|
@@ -886,140 +887,29 @@ struct IVFPQScannerT : QueryTables {
|
|
886
887
|
* Scaning the codes: simple PQ scan.
|
887
888
|
*****************************************************/
|
888
889
|
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
}
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
type inline distance_single_code(const uint8_t* code) const {
|
913
|
-
float result = 0;
|
914
|
-
|
915
|
-
size_t m = 0;
|
916
|
-
const size_t pqM16 = pq.M / 16;
|
917
|
-
|
918
|
-
const float* tab = sim_table;
|
919
|
-
|
920
|
-
if (pqM16 > 0) {
|
921
|
-
// process 16 values per loop
|
922
|
-
|
923
|
-
const __m256i ksub = _mm256_set1_epi32(pq.ksub);
|
924
|
-
__m256i offsets_0 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
925
|
-
offsets_0 = _mm256_mullo_epi32(offsets_0, ksub);
|
926
|
-
|
927
|
-
// accumulators of partial sums
|
928
|
-
__m256 partialSum = _mm256_setzero_ps();
|
929
|
-
|
930
|
-
// loop
|
931
|
-
for (m = 0; m < pqM16 * 16; m += 16) {
|
932
|
-
// load 16 uint8 values
|
933
|
-
const __m128i mm1 =
|
934
|
-
_mm_loadu_si128((const __m128i_u*)(code + m));
|
935
|
-
{
|
936
|
-
// convert uint8 values (low part of __m128i) to int32
|
937
|
-
// values
|
938
|
-
const __m256i idx1 = _mm256_cvtepu8_epi32(mm1);
|
939
|
-
|
940
|
-
// add offsets
|
941
|
-
const __m256i indices_to_read_from =
|
942
|
-
_mm256_add_epi32(idx1, offsets_0);
|
943
|
-
|
944
|
-
// gather 8 values, similar to 8 operations of tab[idx]
|
945
|
-
__m256 collected = _mm256_i32gather_ps(
|
946
|
-
tab, indices_to_read_from, sizeof(float));
|
947
|
-
tab += pq.ksub * 8;
|
948
|
-
|
949
|
-
// collect partial sums
|
950
|
-
partialSum = _mm256_add_ps(partialSum, collected);
|
951
|
-
}
|
952
|
-
|
953
|
-
// move high 8 uint8 to low ones
|
954
|
-
const __m128i mm2 =
|
955
|
-
_mm_unpackhi_epi64(mm1, _mm_setzero_si128());
|
956
|
-
{
|
957
|
-
// convert uint8 values (low part of __m128i) to int32
|
958
|
-
// values
|
959
|
-
const __m256i idx1 = _mm256_cvtepu8_epi32(mm2);
|
960
|
-
|
961
|
-
// add offsets
|
962
|
-
const __m256i indices_to_read_from =
|
963
|
-
_mm256_add_epi32(idx1, offsets_0);
|
964
|
-
|
965
|
-
// gather 8 values, similar to 8 operations of tab[idx]
|
966
|
-
__m256 collected = _mm256_i32gather_ps(
|
967
|
-
tab, indices_to_read_from, sizeof(float));
|
968
|
-
tab += pq.ksub * 8;
|
969
|
-
|
970
|
-
// collect partial sums
|
971
|
-
partialSum = _mm256_add_ps(partialSum, collected);
|
972
|
-
}
|
973
|
-
}
|
974
|
-
|
975
|
-
// horizontal sum for partialSum
|
976
|
-
const __m256 h0 = _mm256_hadd_ps(partialSum, partialSum);
|
977
|
-
const __m256 h1 = _mm256_hadd_ps(h0, h0);
|
978
|
-
|
979
|
-
// extract high and low __m128 regs from __m256
|
980
|
-
const __m128 h2 = _mm256_extractf128_ps(h1, 1);
|
981
|
-
const __m128 h3 = _mm256_castps256_ps128(h1);
|
982
|
-
|
983
|
-
// get a final hsum into all 4 regs
|
984
|
-
const __m128 h4 = _mm_add_ss(h2, h3);
|
985
|
-
|
986
|
-
// extract f[0] from __m128
|
987
|
-
const float hsum = _mm_cvtss_f32(h4);
|
988
|
-
result += hsum;
|
989
|
-
}
|
990
|
-
|
991
|
-
//
|
992
|
-
if (m < pq.M) {
|
993
|
-
// process leftovers
|
994
|
-
PQDecoder decoder(code + m, pq.nbits);
|
995
|
-
|
996
|
-
for (; m < pq.M; m++) {
|
997
|
-
result += tab[decoder.decode()];
|
998
|
-
tab += pq.ksub;
|
999
|
-
}
|
1000
|
-
}
|
1001
|
-
|
1002
|
-
return result;
|
1003
|
-
}
|
1004
|
-
|
1005
|
-
#else
|
1006
|
-
/// Returns the distance to a single code.
|
1007
|
-
/// General-purpose version.
|
1008
|
-
template <class SearchResultType>
|
1009
|
-
inline float distance_single_code(const uint8_t* code) const {
|
1010
|
-
PQDecoder decoder(code, pq.nbits);
|
1011
|
-
|
1012
|
-
const float* tab = sim_table;
|
1013
|
-
float result = 0;
|
1014
|
-
|
1015
|
-
for (size_t m = 0; m < pq.M; m++) {
|
1016
|
-
result += tab[decoder.decode()];
|
1017
|
-
tab += pq.ksub;
|
1018
|
-
}
|
1019
|
-
|
1020
|
-
return result;
|
1021
|
-
}
|
1022
|
-
#endif
|
890
|
+
// This is the baseline version of scan_list_with_tables().
|
891
|
+
// It demonstrates what this function actually does.
|
892
|
+
//
|
893
|
+
// /// version of the scan where we use precomputed tables.
|
894
|
+
// template <class SearchResultType>
|
895
|
+
// void scan_list_with_table(
|
896
|
+
// size_t ncode,
|
897
|
+
// const uint8_t* codes,
|
898
|
+
// SearchResultType& res) const {
|
899
|
+
//
|
900
|
+
// for (size_t j = 0; j < ncode; j++, codes += pq.code_size) {
|
901
|
+
// if (res.skip_entry(j)) {
|
902
|
+
// continue;
|
903
|
+
// }
|
904
|
+
// float dis = dis0 + distance_single_code<PQDecoder>(
|
905
|
+
// pq, sim_table, codes);
|
906
|
+
// res.add(j, dis);
|
907
|
+
// }
|
908
|
+
// }
|
909
|
+
|
910
|
+
// This is the modified version of scan_list_with_tables().
|
911
|
+
// It was observed that doing manual unrolling of the loop that
|
912
|
+
// utilizes distance_single_code() speeds up the computations.
|
1023
913
|
|
1024
914
|
/// version of the scan where we use precomputed tables.
|
1025
915
|
template <class SearchResultType>
|
@@ -1027,12 +917,65 @@ struct IVFPQScannerT : QueryTables {
|
|
1027
917
|
size_t ncode,
|
1028
918
|
const uint8_t* codes,
|
1029
919
|
SearchResultType& res) const {
|
1030
|
-
|
920
|
+
int counter = 0;
|
921
|
+
|
922
|
+
size_t saved_j[4] = {0, 0, 0, 0};
|
923
|
+
for (size_t j = 0; j < ncode; j++) {
|
1031
924
|
if (res.skip_entry(j)) {
|
1032
925
|
continue;
|
1033
926
|
}
|
1034
|
-
|
1035
|
-
|
927
|
+
|
928
|
+
saved_j[0] = (counter == 0) ? j : saved_j[0];
|
929
|
+
saved_j[1] = (counter == 1) ? j : saved_j[1];
|
930
|
+
saved_j[2] = (counter == 2) ? j : saved_j[2];
|
931
|
+
saved_j[3] = (counter == 3) ? j : saved_j[3];
|
932
|
+
|
933
|
+
counter += 1;
|
934
|
+
if (counter == 4) {
|
935
|
+
float distance_0 = 0;
|
936
|
+
float distance_1 = 0;
|
937
|
+
float distance_2 = 0;
|
938
|
+
float distance_3 = 0;
|
939
|
+
distance_four_codes<PQDecoder>(
|
940
|
+
pq,
|
941
|
+
sim_table,
|
942
|
+
codes + saved_j[0] * pq.code_size,
|
943
|
+
codes + saved_j[1] * pq.code_size,
|
944
|
+
codes + saved_j[2] * pq.code_size,
|
945
|
+
codes + saved_j[3] * pq.code_size,
|
946
|
+
distance_0,
|
947
|
+
distance_1,
|
948
|
+
distance_2,
|
949
|
+
distance_3);
|
950
|
+
|
951
|
+
res.add(saved_j[0], dis0 + distance_0);
|
952
|
+
res.add(saved_j[1], dis0 + distance_1);
|
953
|
+
res.add(saved_j[2], dis0 + distance_2);
|
954
|
+
res.add(saved_j[3], dis0 + distance_3);
|
955
|
+
counter = 0;
|
956
|
+
}
|
957
|
+
}
|
958
|
+
|
959
|
+
if (counter >= 1) {
|
960
|
+
float dis =
|
961
|
+
dis0 +
|
962
|
+
distance_single_code<PQDecoder>(
|
963
|
+
pq, sim_table, codes + saved_j[0] * pq.code_size);
|
964
|
+
res.add(saved_j[0], dis);
|
965
|
+
}
|
966
|
+
if (counter >= 2) {
|
967
|
+
float dis =
|
968
|
+
dis0 +
|
969
|
+
distance_single_code<PQDecoder>(
|
970
|
+
pq, sim_table, codes + saved_j[1] * pq.code_size);
|
971
|
+
res.add(saved_j[1], dis);
|
972
|
+
}
|
973
|
+
if (counter >= 3) {
|
974
|
+
float dis =
|
975
|
+
dis0 +
|
976
|
+
distance_single_code<PQDecoder>(
|
977
|
+
pq, sim_table, codes + saved_j[2] * pq.code_size);
|
978
|
+
res.add(saved_j[2], dis);
|
1036
979
|
}
|
1037
980
|
}
|
1038
981
|
|
@@ -1101,6 +1044,46 @@ struct IVFPQScannerT : QueryTables {
|
|
1101
1044
|
* Scanning codes with polysemous filtering
|
1102
1045
|
*****************************************************/
|
1103
1046
|
|
1047
|
+
// This is the baseline version of scan_list_polysemous_hc().
|
1048
|
+
// It demonstrates what this function actually does.
|
1049
|
+
|
1050
|
+
// template <class HammingComputer, class SearchResultType>
|
1051
|
+
// void scan_list_polysemous_hc(
|
1052
|
+
// size_t ncode,
|
1053
|
+
// const uint8_t* codes,
|
1054
|
+
// SearchResultType& res) const {
|
1055
|
+
// int ht = ivfpq.polysemous_ht;
|
1056
|
+
// size_t n_hamming_pass = 0, nup = 0;
|
1057
|
+
//
|
1058
|
+
// int code_size = pq.code_size;
|
1059
|
+
//
|
1060
|
+
// HammingComputer hc(q_code.data(), code_size);
|
1061
|
+
//
|
1062
|
+
// for (size_t j = 0; j < ncode; j++, codes += code_size) {
|
1063
|
+
// if (res.skip_entry(j)) {
|
1064
|
+
// continue;
|
1065
|
+
// }
|
1066
|
+
// const uint8_t* b_code = codes;
|
1067
|
+
// int hd = hc.hamming(b_code);
|
1068
|
+
// if (hd < ht) {
|
1069
|
+
// n_hamming_pass++;
|
1070
|
+
//
|
1071
|
+
// float dis =
|
1072
|
+
// dis0 +
|
1073
|
+
// distance_single_code<PQDecoder>(
|
1074
|
+
// pq, sim_table, codes);
|
1075
|
+
//
|
1076
|
+
// res.add(j, dis);
|
1077
|
+
// }
|
1078
|
+
// }
|
1079
|
+
// #pragma omp critical
|
1080
|
+
// { indexIVFPQ_stats.n_hamming_pass += n_hamming_pass; }
|
1081
|
+
// }
|
1082
|
+
|
1083
|
+
// This is the modified version of scan_list_with_tables().
|
1084
|
+
// It was observed that doing manual unrolling of the loop that
|
1085
|
+
// utilizes distance_single_code() speeds up the computations.
|
1086
|
+
|
1104
1087
|
template <class HammingComputer, class SearchResultType>
|
1105
1088
|
void scan_list_polysemous_hc(
|
1106
1089
|
size_t ncode,
|
@@ -1111,23 +1094,103 @@ struct IVFPQScannerT : QueryTables {
|
|
1111
1094
|
|
1112
1095
|
int code_size = pq.code_size;
|
1113
1096
|
|
1097
|
+
size_t saved_j[8];
|
1098
|
+
int counter = 0;
|
1099
|
+
|
1114
1100
|
HammingComputer hc(q_code.data(), code_size);
|
1115
1101
|
|
1116
|
-
for (size_t j = 0; j < ncode; j
|
1102
|
+
for (size_t j = 0; j < (ncode / 4) * 4; j += 4) {
|
1103
|
+
const uint8_t* b_code = codes + j * code_size;
|
1104
|
+
|
1105
|
+
// Unrolling is a key. Basically, doing multiple popcount
|
1106
|
+
// operations one after another speeds things up.
|
1107
|
+
|
1108
|
+
// 9999999 is just an arbitrary large number
|
1109
|
+
int hd0 = (res.skip_entry(j + 0))
|
1110
|
+
? 99999999
|
1111
|
+
: hc.hamming(b_code + 0 * code_size);
|
1112
|
+
int hd1 = (res.skip_entry(j + 1))
|
1113
|
+
? 99999999
|
1114
|
+
: hc.hamming(b_code + 1 * code_size);
|
1115
|
+
int hd2 = (res.skip_entry(j + 2))
|
1116
|
+
? 99999999
|
1117
|
+
: hc.hamming(b_code + 2 * code_size);
|
1118
|
+
int hd3 = (res.skip_entry(j + 3))
|
1119
|
+
? 99999999
|
1120
|
+
: hc.hamming(b_code + 3 * code_size);
|
1121
|
+
|
1122
|
+
saved_j[counter] = j + 0;
|
1123
|
+
counter = (hd0 < ht) ? (counter + 1) : counter;
|
1124
|
+
saved_j[counter] = j + 1;
|
1125
|
+
counter = (hd1 < ht) ? (counter + 1) : counter;
|
1126
|
+
saved_j[counter] = j + 2;
|
1127
|
+
counter = (hd2 < ht) ? (counter + 1) : counter;
|
1128
|
+
saved_j[counter] = j + 3;
|
1129
|
+
counter = (hd3 < ht) ? (counter + 1) : counter;
|
1130
|
+
|
1131
|
+
if (counter >= 4) {
|
1132
|
+
// process four codes at the same time
|
1133
|
+
n_hamming_pass += 4;
|
1134
|
+
|
1135
|
+
float distance_0 = dis0;
|
1136
|
+
float distance_1 = dis0;
|
1137
|
+
float distance_2 = dis0;
|
1138
|
+
float distance_3 = dis0;
|
1139
|
+
distance_four_codes<PQDecoder>(
|
1140
|
+
pq,
|
1141
|
+
sim_table,
|
1142
|
+
codes + saved_j[0] * pq.code_size,
|
1143
|
+
codes + saved_j[1] * pq.code_size,
|
1144
|
+
codes + saved_j[2] * pq.code_size,
|
1145
|
+
codes + saved_j[3] * pq.code_size,
|
1146
|
+
distance_0,
|
1147
|
+
distance_1,
|
1148
|
+
distance_2,
|
1149
|
+
distance_3);
|
1150
|
+
|
1151
|
+
res.add(saved_j[0], dis0 + distance_0);
|
1152
|
+
res.add(saved_j[1], dis0 + distance_1);
|
1153
|
+
res.add(saved_j[2], dis0 + distance_2);
|
1154
|
+
res.add(saved_j[3], dis0 + distance_3);
|
1155
|
+
|
1156
|
+
//
|
1157
|
+
counter -= 4;
|
1158
|
+
saved_j[0] = saved_j[4];
|
1159
|
+
saved_j[1] = saved_j[5];
|
1160
|
+
saved_j[2] = saved_j[6];
|
1161
|
+
saved_j[3] = saved_j[7];
|
1162
|
+
}
|
1163
|
+
}
|
1164
|
+
|
1165
|
+
for (size_t kk = 0; kk < counter; kk++) {
|
1166
|
+
n_hamming_pass++;
|
1167
|
+
|
1168
|
+
float dis =
|
1169
|
+
dis0 +
|
1170
|
+
distance_single_code<PQDecoder>(
|
1171
|
+
pq, sim_table, codes + saved_j[kk] * pq.code_size);
|
1172
|
+
|
1173
|
+
res.add(saved_j[kk], dis);
|
1174
|
+
}
|
1175
|
+
|
1176
|
+
// process leftovers
|
1177
|
+
for (size_t j = (ncode / 4) * 4; j < ncode; j++) {
|
1117
1178
|
if (res.skip_entry(j)) {
|
1118
1179
|
continue;
|
1119
1180
|
}
|
1120
|
-
const uint8_t* b_code = codes;
|
1181
|
+
const uint8_t* b_code = codes + j * code_size;
|
1121
1182
|
int hd = hc.hamming(b_code);
|
1122
1183
|
if (hd < ht) {
|
1123
1184
|
n_hamming_pass++;
|
1124
1185
|
|
1125
|
-
float dis =
|
1126
|
-
|
1186
|
+
float dis = dis0 +
|
1187
|
+
distance_single_code<PQDecoder>(
|
1188
|
+
pq, sim_table, codes + j * code_size);
|
1127
1189
|
|
1128
1190
|
res.add(j, dis);
|
1129
1191
|
}
|
1130
1192
|
}
|
1193
|
+
|
1131
1194
|
#pragma omp critical
|
1132
1195
|
{ indexIVFPQ_stats.n_hamming_pass += n_hamming_pass; }
|
1133
1196
|
}
|
@@ -1171,7 +1234,7 @@ struct IVFPQScannerT : QueryTables {
|
|
1171
1234
|
* use_sel: store or ignore the IDSelector
|
1172
1235
|
*/
|
1173
1236
|
template <MetricType METRIC_TYPE, class C, class PQDecoder, bool use_sel>
|
1174
|
-
struct IVFPQScanner : IVFPQScannerT<
|
1237
|
+
struct IVFPQScanner : IVFPQScannerT<idx_t, METRIC_TYPE, PQDecoder>,
|
1175
1238
|
InvertedListScanner {
|
1176
1239
|
int precompute_mode;
|
1177
1240
|
const IDSelector* sel;
|
@@ -1181,9 +1244,7 @@ struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
|
1181
1244
|
bool store_pairs,
|
1182
1245
|
int precompute_mode,
|
1183
1246
|
const IDSelector* sel)
|
1184
|
-
: IVFPQScannerT<
|
1185
|
-
ivfpq,
|
1186
|
-
nullptr),
|
1247
|
+
: IVFPQScannerT<idx_t, METRIC_TYPE, PQDecoder>(ivfpq, nullptr),
|
1187
1248
|
precompute_mode(precompute_mode),
|
1188
1249
|
sel(sel) {
|
1189
1250
|
this->store_pairs = store_pairs;
|
@@ -1200,14 +1261,9 @@ struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
|
1200
1261
|
|
1201
1262
|
float distance_to_code(const uint8_t* code) const override {
|
1202
1263
|
assert(precompute_mode == 2);
|
1203
|
-
float dis = this->dis0
|
1204
|
-
|
1205
|
-
|
1206
|
-
|
1207
|
-
for (size_t m = 0; m < this->pq.M; m++) {
|
1208
|
-
dis += tab[decoder.decode()];
|
1209
|
-
tab += this->pq.ksub;
|
1210
|
-
}
|
1264
|
+
float dis = this->dis0 +
|
1265
|
+
distance_single_code<PQDecoder>(
|
1266
|
+
this->pq, this->sim_table, code);
|
1211
1267
|
return dis;
|
1212
1268
|
}
|
1213
1269
|
|
@@ -156,7 +156,12 @@ void IndexIVFPQFastScan::train_residual(idx_t n, const float* x_in) {
|
|
156
156
|
|
157
157
|
void IndexIVFPQFastScan::precompute_table() {
|
158
158
|
initialize_IVFPQ_precomputed_table(
|
159
|
-
use_precomputed_table,
|
159
|
+
use_precomputed_table,
|
160
|
+
quantizer,
|
161
|
+
pq,
|
162
|
+
precomputed_table,
|
163
|
+
by_residual,
|
164
|
+
verbose);
|
160
165
|
}
|
161
166
|
|
162
167
|
/*********************************************************
|
@@ -50,7 +50,6 @@ int sgemm_(
|
|
50
50
|
|
51
51
|
namespace faiss {
|
52
52
|
|
53
|
-
using idx_t = Index::idx_t;
|
54
53
|
using storage_idx_t = NNDescent::storage_idx_t;
|
55
54
|
|
56
55
|
/**************************************************************
|
@@ -89,7 +88,7 @@ struct NegativeDistanceComputer : DistanceComputer {
|
|
89
88
|
};
|
90
89
|
|
91
90
|
DistanceComputer* storage_distance_computer(const Index* storage) {
|
92
|
-
if (storage->metric_type
|
91
|
+
if (is_similarity_metric(storage->metric_type)) {
|
93
92
|
return new NegativeDistanceComputer(storage->get_distance_computer());
|
94
93
|
} else {
|
95
94
|
return storage->get_distance_computer();
|
@@ -23,7 +23,6 @@
|
|
23
23
|
|
24
24
|
namespace faiss {
|
25
25
|
|
26
|
-
using idx_t = Index::idx_t;
|
27
26
|
using namespace nsg;
|
28
27
|
|
29
28
|
/**************************************************************
|
@@ -113,7 +112,7 @@ void IndexNSG::search(
|
|
113
112
|
InterruptCallback::check();
|
114
113
|
}
|
115
114
|
|
116
|
-
if (metric_type
|
115
|
+
if (is_similarity_metric(metric_type)) {
|
117
116
|
// we need to revert the negated distances
|
118
117
|
for (size_t i = 0; i < k * n; i++) {
|
119
118
|
distances[i] = -distances[i];
|
@@ -19,6 +19,8 @@
|
|
19
19
|
#include <faiss/impl/FaissAssert.h>
|
20
20
|
#include <faiss/utils/hamming.h>
|
21
21
|
|
22
|
+
#include <faiss/impl/code_distance/code_distance.h>
|
23
|
+
|
22
24
|
namespace faiss {
|
23
25
|
|
24
26
|
/*********************************************************
|
@@ -74,22 +76,18 @@ template <class PQDecoder>
|
|
74
76
|
struct PQDistanceComputer : FlatCodesDistanceComputer {
|
75
77
|
size_t d;
|
76
78
|
MetricType metric;
|
77
|
-
|
79
|
+
idx_t nb;
|
78
80
|
const ProductQuantizer& pq;
|
79
81
|
const float* sdc;
|
80
82
|
std::vector<float> precomputed_table;
|
81
83
|
size_t ndis;
|
82
84
|
|
83
85
|
float distance_to_code(const uint8_t* code) final {
|
84
|
-
const float* dt = precomputed_table.data();
|
85
|
-
PQDecoder decoder(code, pq.nbits);
|
86
|
-
float accu = 0;
|
87
|
-
for (int j = 0; j < pq.M; j++) {
|
88
|
-
accu += dt[decoder.decode()];
|
89
|
-
dt += 1 << decoder.nbits;
|
90
|
-
}
|
91
86
|
ndis++;
|
92
|
-
|
87
|
+
|
88
|
+
float dis = distance_single_code<PQDecoder>(
|
89
|
+
pq, precomputed_table.data(), code);
|
90
|
+
return dis;
|
93
91
|
}
|
94
92
|
|
95
93
|
float symmetric_dis(idx_t i, idx_t j) override {
|
@@ -123,14 +123,13 @@ void IndexReplicasTemplate<IndexT>::search(
|
|
123
123
|
size_t componentsPerVec = sizeof(component_t) == 1 ? (dim + 7) / 8 : dim;
|
124
124
|
|
125
125
|
// Partition the query by the number of indices we have
|
126
|
-
faiss::
|
127
|
-
(faiss::
|
128
|
-
(faiss::Index::idx_t)this->count();
|
126
|
+
faiss::idx_t queriesPerIndex =
|
127
|
+
(faiss::idx_t)(n + this->count() - 1) / (faiss::idx_t)this->count();
|
129
128
|
FAISS_ASSERT(n / queriesPerIndex <= this->count());
|
130
129
|
|
131
130
|
auto fn = [queriesPerIndex, componentsPerVec, n, x, k, distances, labels](
|
132
131
|
int i, const IndexT* index) {
|
133
|
-
faiss::
|
132
|
+
faiss::idx_t base = (faiss::idx_t)i * queriesPerIndex;
|
134
133
|
|
135
134
|
if (base < n) {
|
136
135
|
auto numForIndex = std::min(queriesPerIndex, n - base);
|
@@ -20,7 +20,6 @@ namespace faiss {
|
|
20
20
|
template <typename IndexT>
|
21
21
|
class IndexReplicasTemplate : public ThreadedIndex<IndexT> {
|
22
22
|
public:
|
23
|
-
using idx_t = typename IndexT::idx_t;
|
24
23
|
using component_t = typename IndexT::component_t;
|
25
24
|
using distance_t = typename IndexT::distance_t;
|
26
25
|
|
@@ -1,3 +1,10 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
1
8
|
#include <faiss/IndexRowwiseMinMax.h>
|
2
9
|
|
3
10
|
#include <cstdint>
|
@@ -11,7 +18,7 @@ namespace faiss {
|
|
11
18
|
|
12
19
|
namespace {
|
13
20
|
|
14
|
-
using idx_t = faiss::
|
21
|
+
using idx_t = faiss::idx_t;
|
15
22
|
|
16
23
|
struct StorageMinMaxFP16 {
|
17
24
|
uint16_t scaler;
|