faiss 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +36 -33
- data/vendor/faiss/faiss/AutoTune.h +6 -3
- data/vendor/faiss/faiss/Clustering.cpp +16 -12
- data/vendor/faiss/faiss/Index.cpp +3 -4
- data/vendor/faiss/faiss/Index.h +3 -3
- data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
- data/vendor/faiss/faiss/IndexBinary.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
- data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
- data/vendor/faiss/faiss/IndexFlat.h +0 -51
- data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
- data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
- data/vendor/faiss/faiss/IndexIVF.h +22 -15
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
- data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
- data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
- data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
- data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
- data/vendor/faiss/faiss/IndexRefine.h +73 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
- data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
- data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
- data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
- data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
- data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
- data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
- data/vendor/faiss/faiss/impl/io.cpp +33 -2
- data/vendor/faiss/faiss/impl/io.h +7 -2
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
- data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
- data/vendor/faiss/faiss/index_factory.cpp +112 -7
- data/vendor/faiss/faiss/index_io.h +1 -48
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
- data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
- data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
- data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
- data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
- data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
- data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
- data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
- data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
- data/vendor/faiss/faiss/utils/Heap.h +61 -50
- data/vendor/faiss/faiss/utils/distances.cpp +164 -319
- data/vendor/faiss/faiss/utils/distances.h +28 -20
- data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
- data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
- data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
- data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
- data/vendor/faiss/faiss/utils/hamming.h +2 -7
- data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
- data/vendor/faiss/faiss/utils/partitioning.h +69 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
- data/vendor/faiss/faiss/utils/simdlib.h +31 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
- metadata +43 -141
- data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
- data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
- data/vendor/faiss/c_api/AutoTune_c.h +0 -66
- data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
- data/vendor/faiss/c_api/Clustering_c.h +0 -123
- data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
- data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
- data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
- data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
- data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
- data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
- data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
- data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
- data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
- data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
- data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
- data/vendor/faiss/c_api/IndexShards_c.h +0 -39
- data/vendor/faiss/c_api/Index_c.cpp +0 -105
- data/vendor/faiss/c_api/Index_c.h +0 -183
- data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
- data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
- data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
- data/vendor/faiss/c_api/clone_index_c.h +0 -32
- data/vendor/faiss/c_api/error_c.h +0 -42
- data/vendor/faiss/c_api/error_impl.cpp +0 -27
- data/vendor/faiss/c_api/error_impl.h +0 -16
- data/vendor/faiss/c_api/faiss_c.h +0 -58
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
- data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
- data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
- data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
- data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
- data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
- data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
- data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
- data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
- data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
- data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
- data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
- data/vendor/faiss/c_api/index_factory_c.h +0 -30
- data/vendor/faiss/c_api/index_io_c.cpp +0 -42
- data/vendor/faiss/c_api/index_io_c.h +0 -50
- data/vendor/faiss/c_api/macros_impl.h +0 -110
- data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
- data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
- data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
- data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
- data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
- data/vendor/faiss/misc/test_blas.cpp +0 -87
- data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
- data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
- data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
- data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
- data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
- data/vendor/faiss/tests/test_merge.cpp +0 -260
- data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
- data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
- data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
- data/vendor/faiss/tests/test_params_override.cpp +0 -236
- data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
- data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
- data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
- data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
- data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
- data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
- data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
- data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
- data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -371,7 +371,7 @@ void IndexIVFPQ::reconstruct_from_offset (int64_t list_no, int64_t offset,
|
|
371
371
|
|
372
372
|
|
373
373
|
/// 2G by default, accommodates tables up to PQ32 w/ 65536 centroids
|
374
|
-
size_t
|
374
|
+
size_t precomputed_table_max_bytes = ((size_t)1) << 31;
|
375
375
|
|
376
376
|
/** Precomputed tables for residuals
|
377
377
|
*
|
@@ -403,10 +403,22 @@ size_t IndexIVFPQ::precomputed_table_max_bytes = ((size_t)1) << 31;
|
|
403
403
|
* is faster when the length of the lists is > ksub * M.
|
404
404
|
*/
|
405
405
|
|
406
|
-
void
|
406
|
+
void initialize_IVFPQ_precomputed_table (
|
407
|
+
int &use_precomputed_table,
|
408
|
+
const Index *quantizer,
|
409
|
+
const ProductQuantizer &pq,
|
410
|
+
AlignedTable<float> & precomputed_table,
|
411
|
+
bool verbose
|
412
|
+
)
|
407
413
|
{
|
408
|
-
|
414
|
+
size_t nlist = quantizer->ntotal;
|
415
|
+
size_t d = quantizer->d;
|
416
|
+
FAISS_THROW_IF_NOT(d == pq.d);
|
417
|
+
|
418
|
+
if (use_precomputed_table == -1) {
|
419
|
+
precomputed_table.resize (0);
|
409
420
|
return;
|
421
|
+
}
|
410
422
|
|
411
423
|
if (use_precomputed_table == 0) { // then choose the type of table
|
412
424
|
if (quantizer->metric_type == METRIC_INNER_PRODUCT) {
|
@@ -414,6 +426,7 @@ void IndexIVFPQ::precompute_table ()
|
|
414
426
|
printf("IndexIVFPQ::precompute_table: precomputed "
|
415
427
|
"tables not needed for inner product quantizers\n");
|
416
428
|
}
|
429
|
+
precomputed_table.resize (0);
|
417
430
|
return;
|
418
431
|
}
|
419
432
|
const MultiIndexQuantizer *miq =
|
@@ -492,6 +505,16 @@ void IndexIVFPQ::precompute_table ()
|
|
492
505
|
|
493
506
|
}
|
494
507
|
|
508
|
+
void IndexIVFPQ::precompute_table ()
|
509
|
+
{
|
510
|
+
initialize_IVFPQ_precomputed_table (
|
511
|
+
use_precomputed_table, quantizer, pq, precomputed_table,
|
512
|
+
verbose
|
513
|
+
);
|
514
|
+
}
|
515
|
+
|
516
|
+
|
517
|
+
|
495
518
|
namespace {
|
496
519
|
|
497
520
|
using idx_t = Index::idx_t;
|
@@ -676,11 +699,12 @@ struct QueryTables {
|
|
676
699
|
} else if (use_precomputed_table == 1) {
|
677
700
|
dis0 = coarse_dis;
|
678
701
|
|
679
|
-
fvec_madd (
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
702
|
+
fvec_madd (
|
703
|
+
pq.M * pq.ksub,
|
704
|
+
ivfpq.precomputed_table.data() + key * pq.ksub * pq.M,
|
705
|
+
-2.0, sim_table_2,
|
706
|
+
sim_table
|
707
|
+
);
|
684
708
|
|
685
709
|
if (polysemous_ht != 0) {
|
686
710
|
ivfpq.quantizer->compute_residual (qi, residual_vec, key);
|
@@ -706,8 +730,8 @@ struct QueryTables {
|
|
706
730
|
k >>= cpq.nbits;
|
707
731
|
|
708
732
|
// get corresponding table
|
709
|
-
const float *pc =
|
710
|
-
|
733
|
+
const float *pc = ivfpq.precomputed_table.data() +
|
734
|
+
(ki * pq.M + cm * Mf) * pq.ksub;
|
711
735
|
|
712
736
|
if (polysemous_ht == 0) {
|
713
737
|
|
@@ -741,7 +765,8 @@ struct QueryTables {
|
|
741
765
|
if (use_precomputed_table == 1) {
|
742
766
|
dis0 = coarse_dis;
|
743
767
|
|
744
|
-
const float * s =
|
768
|
+
const float * s = ivfpq.precomputed_table.data() +
|
769
|
+
key * pq.ksub * pq.M;
|
745
770
|
for (int m = 0; m < pq.M; m++) {
|
746
771
|
sim_table_ptrs [m] = s;
|
747
772
|
s += pq.ksub;
|
@@ -761,8 +786,8 @@ struct QueryTables {
|
|
761
786
|
int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
|
762
787
|
k >>= cpq.nbits;
|
763
788
|
|
764
|
-
const float *pc =
|
765
|
-
|
789
|
+
const float *pc = ivfpq.precomputed_table.data() +
|
790
|
+
(ki * pq.M + cm * Mf) * pq.ksub;
|
766
791
|
|
767
792
|
for (int m = m0; m < m0 + Mf; m++) {
|
768
793
|
sim_table_ptrs [m] = pc;
|
@@ -803,9 +828,8 @@ struct KnnSearchResults {
|
|
803
828
|
|
804
829
|
inline void add (idx_t j, float dis) {
|
805
830
|
if (C::cmp (heap_sim[0], dis)) {
|
806
|
-
heap_pop<C> (k, heap_sim, heap_ids);
|
807
831
|
idx_t id = ids ? ids[j] : lo_build (key, j);
|
808
|
-
|
832
|
+
heap_replace_top<C> (k, heap_sim, heap_ids, dis, id);
|
809
833
|
nup++;
|
810
834
|
}
|
811
835
|
}
|
@@ -16,7 +16,7 @@
|
|
16
16
|
#include <faiss/IndexIVF.h>
|
17
17
|
#include <faiss/IndexPQ.h>
|
18
18
|
#include <faiss/impl/platform_macros.h>
|
19
|
-
|
19
|
+
#include <faiss/utils/AlignedTable.h>
|
20
20
|
|
21
21
|
namespace faiss {
|
22
22
|
|
@@ -28,10 +28,14 @@ struct IVFPQSearchParameters: IVFSearchParameters {
|
|
28
28
|
};
|
29
29
|
|
30
30
|
|
31
|
+
|
32
|
+
FAISS_API extern size_t precomputed_table_max_bytes;
|
33
|
+
|
34
|
+
|
31
35
|
/** Inverted file with Product Quantizer encoding. Each residual
|
32
36
|
* vector is encoded as a product quantizer code.
|
33
37
|
*/
|
34
|
-
struct
|
38
|
+
struct IndexIVFPQ: IndexIVF {
|
35
39
|
bool by_residual; ///< Encode residual or plain vector?
|
36
40
|
|
37
41
|
ProductQuantizer pq; ///< produces the codes
|
@@ -45,18 +49,12 @@ struct FAISS_API IndexIVFPQ: IndexIVF {
|
|
45
49
|
|
46
50
|
/** Precompute table that speed up query preprocessing at some
|
47
51
|
* memory cost (used only for by_residual with L2 metric)
|
48
|
-
* =-1: force disable
|
49
|
-
* =0: decide heuristically (default: use tables only if they are
|
50
|
-
* < precomputed_tables_max_bytes)
|
51
|
-
* =1: tables that work for all quantizers (size 256 * nlist * M)
|
52
|
-
* =2: specific version for MultiIndexQuantizer (much more compact)
|
53
52
|
*/
|
54
53
|
int use_precomputed_table;
|
55
|
-
static size_t precomputed_table_max_bytes;
|
56
54
|
|
57
55
|
/// if use_precompute_table
|
58
56
|
/// size nlist * pq.M * pq.ksub
|
59
|
-
|
57
|
+
AlignedTable<float> precomputed_table;
|
60
58
|
|
61
59
|
IndexIVFPQ (
|
62
60
|
Index * quantizer, size_t d, size_t nlist,
|
@@ -133,6 +131,24 @@ struct FAISS_API IndexIVFPQ: IndexIVF {
|
|
133
131
|
|
134
132
|
};
|
135
133
|
|
134
|
+
/** Pre-compute distance tables for IVFPQ with by-residual and METRIC_L2
|
135
|
+
*
|
136
|
+
* @param use_precomputed_table (I/O)
|
137
|
+
* =-1: force disable
|
138
|
+
* =0: decide heuristically (default: use tables only if they are
|
139
|
+
* < precomputed_tables_max_bytes), set use_precomputed_table on output
|
140
|
+
* =1: tables that work for all quantizers (size 256 * nlist * M)
|
141
|
+
* =2: specific version for MultiIndexQuantizer (much more compact)
|
142
|
+
* @param precomputed_table precomputed table to intialize
|
143
|
+
*/
|
144
|
+
|
145
|
+
void initialize_IVFPQ_precomputed_table(
|
146
|
+
int &use_precomputed_table,
|
147
|
+
const Index *quantizer,
|
148
|
+
const ProductQuantizer &pq,
|
149
|
+
AlignedTable<float> & precomputed_table,
|
150
|
+
bool verbose
|
151
|
+
);
|
136
152
|
|
137
153
|
/// statistics are robust to internal threading, but not if
|
138
154
|
/// IndexIVFPQ::search_preassigned is called by multiple threads
|
@@ -0,0 +1,1116 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
3
|
+
*
|
4
|
+
* This source code is licensed under the MIT license found in the
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
6
|
+
*/
|
7
|
+
|
8
|
+
#include <faiss/IndexIVFPQFastScan.h>
|
9
|
+
|
10
|
+
#include <cassert>
|
11
|
+
#include <cstdio>
|
12
|
+
#include <inttypes.h>
|
13
|
+
|
14
|
+
#include <omp.h>
|
15
|
+
|
16
|
+
#include <memory>
|
17
|
+
|
18
|
+
#include <faiss/impl/FaissAssert.h>
|
19
|
+
#include <faiss/utils/utils.h>
|
20
|
+
#include <faiss/utils/distances.h>
|
21
|
+
#include <faiss/utils/simdlib.h>
|
22
|
+
#include <faiss/impl/AuxIndexStructures.h>
|
23
|
+
|
24
|
+
#include <faiss/invlists/BlockInvertedLists.h>
|
25
|
+
|
26
|
+
#include <faiss/impl/simd_result_handlers.h>
|
27
|
+
#include <faiss/utils/quantize_lut.h>
|
28
|
+
#include <faiss/impl/pq4_fast_scan.h>
|
29
|
+
|
30
|
+
namespace faiss {
|
31
|
+
|
32
|
+
using namespace simd_result_handlers;
|
33
|
+
|
34
|
+
|
35
|
+
inline size_t roundup(size_t a, size_t b) {
|
36
|
+
return (a + b - 1) / b * b;
|
37
|
+
}
|
38
|
+
|
39
|
+
|
40
|
+
IndexIVFPQFastScan::IndexIVFPQFastScan (
|
41
|
+
Index * quantizer, size_t d, size_t nlist,
|
42
|
+
size_t M, size_t nbits_per_idx,
|
43
|
+
MetricType metric, int bbs):
|
44
|
+
IndexIVF (quantizer, d, nlist, 0, metric),
|
45
|
+
pq (d, M, nbits_per_idx),
|
46
|
+
bbs (bbs)
|
47
|
+
{
|
48
|
+
FAISS_THROW_IF_NOT(nbits_per_idx == 4);
|
49
|
+
M2 = roundup(pq.M, 2);
|
50
|
+
by_residual = false; // set to false by default because it's much faster
|
51
|
+
is_trained = false;
|
52
|
+
code_size = pq.code_size;
|
53
|
+
|
54
|
+
replace_invlists(
|
55
|
+
new BlockInvertedLists(nlist, bbs, bbs * M2 / 2),
|
56
|
+
true
|
57
|
+
);
|
58
|
+
}
|
59
|
+
|
60
|
+
IndexIVFPQFastScan::IndexIVFPQFastScan ()
|
61
|
+
{
|
62
|
+
by_residual = false;
|
63
|
+
bbs = 0;
|
64
|
+
M2 = 0;
|
65
|
+
}
|
66
|
+
|
67
|
+
|
68
|
+
IndexIVFPQFastScan::IndexIVFPQFastScan(const IndexIVFPQ & orig, int bbs):
|
69
|
+
IndexIVF(
|
70
|
+
orig.quantizer, orig.d, orig.nlist,
|
71
|
+
orig.pq.code_size, orig.metric_type),
|
72
|
+
pq(orig.pq),
|
73
|
+
bbs(bbs)
|
74
|
+
{
|
75
|
+
FAISS_THROW_IF_NOT(orig.pq.nbits == 4);
|
76
|
+
|
77
|
+
by_residual = orig.by_residual;
|
78
|
+
ntotal = orig.ntotal;
|
79
|
+
is_trained = orig.is_trained;
|
80
|
+
nprobe = orig.nprobe;
|
81
|
+
size_t M = pq.M;
|
82
|
+
|
83
|
+
M2 = roundup(M, 2);
|
84
|
+
|
85
|
+
replace_invlists(
|
86
|
+
new BlockInvertedLists(orig.nlist, bbs, bbs * M2 / 2),
|
87
|
+
true
|
88
|
+
);
|
89
|
+
|
90
|
+
precomputed_table.resize(orig.precomputed_table.size());
|
91
|
+
|
92
|
+
if (precomputed_table.nbytes() > 0) {
|
93
|
+
memcpy(precomputed_table.get(), orig.precomputed_table.data(),
|
94
|
+
precomputed_table.nbytes()
|
95
|
+
);
|
96
|
+
}
|
97
|
+
|
98
|
+
for(size_t i = 0; i < nlist; i++) {
|
99
|
+
size_t nb = orig.invlists->list_size(i);
|
100
|
+
size_t nb2 = roundup(nb, bbs);
|
101
|
+
AlignedTable<uint8_t> tmp(nb2 * M2 / 2);
|
102
|
+
pq4_pack_codes(
|
103
|
+
InvertedLists::ScopedCodes(orig.invlists, i).get(),
|
104
|
+
nb, M, nb2, bbs, M2,
|
105
|
+
tmp.get()
|
106
|
+
);
|
107
|
+
invlists->add_entries(
|
108
|
+
i, nb,
|
109
|
+
InvertedLists::ScopedIds(orig.invlists, i).get(),
|
110
|
+
tmp.get()
|
111
|
+
);
|
112
|
+
}
|
113
|
+
|
114
|
+
orig_invlists = orig.invlists;
|
115
|
+
}
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
/*********************************************************
|
120
|
+
* Training
|
121
|
+
*********************************************************/
|
122
|
+
|
123
|
+
void IndexIVFPQFastScan::train_residual (idx_t n, const float *x_in)
|
124
|
+
{
|
125
|
+
|
126
|
+
const float * x = fvecs_maybe_subsample (
|
127
|
+
d, (size_t*)&n, pq.cp.max_points_per_centroid * pq.ksub,
|
128
|
+
x_in, verbose, pq.cp.seed);
|
129
|
+
|
130
|
+
std::unique_ptr<float []> del_x;
|
131
|
+
if (x != x_in) {
|
132
|
+
del_x.reset((float*)x);
|
133
|
+
}
|
134
|
+
|
135
|
+
const float *trainset;
|
136
|
+
AlignedTable<float> residuals;
|
137
|
+
|
138
|
+
if (by_residual) {
|
139
|
+
if(verbose) printf("computing residuals\n");
|
140
|
+
std::vector<idx_t> assign(n);
|
141
|
+
quantizer->assign (n, x, assign.data());
|
142
|
+
residuals.resize(n * d);
|
143
|
+
for (idx_t i = 0; i < n; i++) {
|
144
|
+
quantizer->compute_residual (
|
145
|
+
x + i * d,
|
146
|
+
residuals.data() + i * d,
|
147
|
+
assign[i]
|
148
|
+
);
|
149
|
+
}
|
150
|
+
trainset = residuals.data();
|
151
|
+
} else {
|
152
|
+
trainset = x;
|
153
|
+
}
|
154
|
+
|
155
|
+
if (verbose) {
|
156
|
+
printf ("training %zdx%zd product quantizer on %zd vectors in %dD\n",
|
157
|
+
pq.M, pq.ksub, long(n), d);
|
158
|
+
}
|
159
|
+
pq.verbose = verbose;
|
160
|
+
pq.train (n, trainset);
|
161
|
+
|
162
|
+
if (by_residual && metric_type == METRIC_L2) {
|
163
|
+
precompute_table();
|
164
|
+
}
|
165
|
+
|
166
|
+
}
|
167
|
+
|
168
|
+
void IndexIVFPQFastScan::precompute_table ()
|
169
|
+
{
|
170
|
+
initialize_IVFPQ_precomputed_table(
|
171
|
+
use_precomputed_table,
|
172
|
+
quantizer, pq, precomputed_table, verbose
|
173
|
+
);
|
174
|
+
}
|
175
|
+
|
176
|
+
|
177
|
+
/*********************************************************
|
178
|
+
* Code management functions
|
179
|
+
*********************************************************/
|
180
|
+
|
181
|
+
|
182
|
+
|
183
|
+
void IndexIVFPQFastScan::encode_vectors(
|
184
|
+
idx_t n, const float* x, const idx_t *list_nos,
|
185
|
+
uint8_t * codes, bool include_listnos) const
|
186
|
+
{
|
187
|
+
|
188
|
+
if (by_residual) {
|
189
|
+
AlignedTable<float> residuals (n * d);
|
190
|
+
for (size_t i = 0; i < n; i++) {
|
191
|
+
if (list_nos[i] < 0) {
|
192
|
+
memset (residuals.data() + i * d, 0, sizeof(residuals[0]) * d);
|
193
|
+
} else {
|
194
|
+
quantizer->compute_residual (
|
195
|
+
x + i * d, residuals.data() + i * d, list_nos[i]);
|
196
|
+
}
|
197
|
+
}
|
198
|
+
pq.compute_codes (residuals.data(), codes, n);
|
199
|
+
} else {
|
200
|
+
pq.compute_codes (x, codes, n);
|
201
|
+
}
|
202
|
+
|
203
|
+
if (include_listnos) {
|
204
|
+
size_t coarse_size = coarse_code_size();
|
205
|
+
for (idx_t i = n - 1; i >= 0; i--) {
|
206
|
+
uint8_t * code = codes + i * (coarse_size + code_size);
|
207
|
+
memmove (code + coarse_size,
|
208
|
+
codes + i * code_size, code_size);
|
209
|
+
encode_listno (list_nos[i], code);
|
210
|
+
}
|
211
|
+
}
|
212
|
+
}
|
213
|
+
|
214
|
+
|
215
|
+
|
216
|
+
void IndexIVFPQFastScan::add_with_ids (
|
217
|
+
idx_t n, const float * x, const idx_t *xids) {
|
218
|
+
|
219
|
+
// copied from IndexIVF::add_with_ids --->
|
220
|
+
|
221
|
+
// do some blocking to avoid excessive allocs
|
222
|
+
idx_t bs = 65536;
|
223
|
+
if (n > bs) {
|
224
|
+
for (idx_t i0 = 0; i0 < n; i0 += bs) {
|
225
|
+
idx_t i1 = std::min (n, i0 + bs);
|
226
|
+
if (verbose) {
|
227
|
+
printf(" IndexIVFPQFastScan::add_with_ids %zd: %zd",
|
228
|
+
size_t(i0), size_t(i1));
|
229
|
+
}
|
230
|
+
add_with_ids (i1 - i0, x + i0 * d,
|
231
|
+
xids ? xids + i0 : nullptr);
|
232
|
+
}
|
233
|
+
return;
|
234
|
+
}
|
235
|
+
InterruptCallback::check();
|
236
|
+
|
237
|
+
AlignedTable<uint8_t> codes(n * code_size);
|
238
|
+
|
239
|
+
FAISS_THROW_IF_NOT (is_trained);
|
240
|
+
direct_map.check_can_add (xids);
|
241
|
+
|
242
|
+
std::unique_ptr<idx_t []> idx(new idx_t[n]);
|
243
|
+
quantizer->assign (n, x, idx.get());
|
244
|
+
size_t nadd = 0, nminus1 = 0;
|
245
|
+
|
246
|
+
for (size_t i = 0; i < n; i++) {
|
247
|
+
if (idx[i] < 0) nminus1++;
|
248
|
+
}
|
249
|
+
|
250
|
+
AlignedTable<uint8_t> flat_codes(n * code_size);
|
251
|
+
encode_vectors (n, x, idx.get(), flat_codes.get());
|
252
|
+
|
253
|
+
DirectMapAdd dm_adder(direct_map, n, xids);
|
254
|
+
|
255
|
+
// <---
|
256
|
+
|
257
|
+
BlockInvertedLists *bil = dynamic_cast<BlockInvertedLists*>(invlists);
|
258
|
+
FAISS_THROW_IF_NOT_MSG (bil, "only block inverted lists supported");
|
259
|
+
|
260
|
+
// prepare batches
|
261
|
+
std::vector<idx_t> order(n);
|
262
|
+
for(idx_t i = 0; i < n ; i++) { order[i] = i; }
|
263
|
+
|
264
|
+
// TODO should not need stable
|
265
|
+
std::stable_sort(order.begin(), order.end(),
|
266
|
+
[&idx](idx_t a, idx_t b) {
|
267
|
+
return idx[a] < idx[b];
|
268
|
+
}
|
269
|
+
);
|
270
|
+
|
271
|
+
// TODO parallelize
|
272
|
+
idx_t i0 = 0;
|
273
|
+
while (i0 < n) {
|
274
|
+
idx_t list_no = idx[order[i0]];
|
275
|
+
idx_t i1 = i0 + 1;
|
276
|
+
while (i1 < n && idx[order[i1]] == list_no) {
|
277
|
+
i1 ++;
|
278
|
+
}
|
279
|
+
|
280
|
+
if (list_no == -1) {
|
281
|
+
i0 = i1;
|
282
|
+
continue;
|
283
|
+
}
|
284
|
+
|
285
|
+
// make linear array
|
286
|
+
AlignedTable<uint8_t> list_codes((i1 - i0) * code_size);
|
287
|
+
size_t list_size = bil->list_size(list_no);
|
288
|
+
|
289
|
+
bil->resize(list_no, list_size + i1 - i0);
|
290
|
+
|
291
|
+
for(idx_t i = i0; i < i1; i++) {
|
292
|
+
size_t ofs = list_size + i - i0;
|
293
|
+
idx_t id = xids ? xids[order[i]] : ntotal + order[i];
|
294
|
+
dm_adder.add (order[i], list_no, ofs);
|
295
|
+
bil->ids[list_no][ofs] = id;
|
296
|
+
memcpy(
|
297
|
+
list_codes.data() + (i - i0) * code_size,
|
298
|
+
flat_codes.data() + order[i] * code_size,
|
299
|
+
code_size
|
300
|
+
);
|
301
|
+
nadd++;
|
302
|
+
}
|
303
|
+
pq4_pack_codes_range(
|
304
|
+
list_codes.data(), pq.M,
|
305
|
+
list_size, list_size + i1 - i0,
|
306
|
+
bbs, M2, bil->codes[list_no].data()
|
307
|
+
);
|
308
|
+
|
309
|
+
i0 = i1;
|
310
|
+
}
|
311
|
+
|
312
|
+
ntotal += n;
|
313
|
+
|
314
|
+
}
|
315
|
+
|
316
|
+
|
317
|
+
|
318
|
+
/*********************************************************
|
319
|
+
* search
|
320
|
+
*********************************************************/
|
321
|
+
|
322
|
+
|
323
|
+
namespace {
|
324
|
+
|
325
|
+
// from impl/ProductQuantizer.cpp
|
326
|
+
template <class C, typename dis_t>
|
327
|
+
void pq_estimators_from_tables_generic(
|
328
|
+
const ProductQuantizer& pq, size_t nbits,
|
329
|
+
const uint8_t *codes, size_t ncodes,
|
330
|
+
const dis_t *dis_table, const int64_t * ids,
|
331
|
+
float dis0,
|
332
|
+
size_t k, typename C::T *heap_dis, int64_t *heap_ids)
|
333
|
+
{
|
334
|
+
using accu_t = typename C::T;
|
335
|
+
const size_t M = pq.M;
|
336
|
+
const size_t ksub = pq.ksub;
|
337
|
+
for (size_t j = 0; j < ncodes; ++j) {
|
338
|
+
PQDecoderGeneric decoder(
|
339
|
+
codes + j * pq.code_size, nbits
|
340
|
+
);
|
341
|
+
accu_t dis = dis0;
|
342
|
+
const dis_t * dt = dis_table;
|
343
|
+
for (size_t m = 0; m < M; m++) {
|
344
|
+
uint64_t c = decoder.decode();
|
345
|
+
dis += dt[c];
|
346
|
+
dt += ksub;
|
347
|
+
}
|
348
|
+
|
349
|
+
if (C::cmp(heap_dis[0], dis)) {
|
350
|
+
heap_pop<C>(k, heap_dis, heap_ids);
|
351
|
+
heap_push<C>(k, heap_dis, heap_ids, dis, ids[j]);
|
352
|
+
}
|
353
|
+
}
|
354
|
+
}
|
355
|
+
|
356
|
+
using idx_t = Index::idx_t;
|
357
|
+
using namespace quantize_lut;
|
358
|
+
|
359
|
+
void fvec_madd_avx (
|
360
|
+
size_t n, const float *a,
|
361
|
+
float bf, const float *b, float *c)
|
362
|
+
{
|
363
|
+
assert(is_aligned_pointer(a));
|
364
|
+
assert(is_aligned_pointer(b));
|
365
|
+
assert(is_aligned_pointer(c));
|
366
|
+
assert(n % 8 == 0);
|
367
|
+
simd8float32 bf8(bf);
|
368
|
+
n /= 8;
|
369
|
+
for(size_t i = 0; i < n; i++) {
|
370
|
+
simd8float32 ai(a);
|
371
|
+
simd8float32 bi(b);
|
372
|
+
|
373
|
+
simd8float32 ci = fmadd(bf8, bi, ai);
|
374
|
+
ci.store(c);
|
375
|
+
c += 8;
|
376
|
+
a += 8;
|
377
|
+
b += 8;
|
378
|
+
}
|
379
|
+
|
380
|
+
}
|
381
|
+
|
382
|
+
} // anonymous namespace
|
383
|
+
|
384
|
+
/*********************************************************
|
385
|
+
* Look-Up Table functions
|
386
|
+
*********************************************************/
|
387
|
+
|
388
|
+
|
389
|
+
void IndexIVFPQFastScan::compute_LUT(
|
390
|
+
size_t n, const float *x,
|
391
|
+
const idx_t *coarse_ids, const float *coarse_dis,
|
392
|
+
AlignedTable<float> & dis_tables,
|
393
|
+
AlignedTable<float> & biases
|
394
|
+
) const
|
395
|
+
{
|
396
|
+
const IndexIVFPQFastScan & ivfpq = *this;
|
397
|
+
size_t dim12 = pq.ksub * pq.M;
|
398
|
+
size_t d = pq.d;
|
399
|
+
size_t nprobe = ivfpq.nprobe;
|
400
|
+
|
401
|
+
if (ivfpq.by_residual) {
|
402
|
+
|
403
|
+
if (ivfpq.metric_type == METRIC_L2) {
|
404
|
+
|
405
|
+
dis_tables.resize(n * nprobe * dim12);
|
406
|
+
|
407
|
+
if (ivfpq.use_precomputed_table == 1) {
|
408
|
+
biases.resize(n * nprobe);
|
409
|
+
memcpy(biases.get(), coarse_dis, sizeof(float) * n * nprobe);
|
410
|
+
|
411
|
+
AlignedTable<float> ip_table(n * dim12);
|
412
|
+
pq.compute_inner_prod_tables (n, x, ip_table.get());
|
413
|
+
|
414
|
+
#pragma omp parallel for if (n * nprobe > 8000)
|
415
|
+
for(idx_t ij = 0; ij < n * nprobe; ij++) {
|
416
|
+
idx_t i = ij / nprobe;
|
417
|
+
float *tab = dis_tables.get() + ij * dim12;
|
418
|
+
idx_t cij = coarse_ids[ij];
|
419
|
+
|
420
|
+
if (cij >= 0) {
|
421
|
+
fvec_madd_avx (
|
422
|
+
dim12,
|
423
|
+
precomputed_table.get() + cij * dim12,
|
424
|
+
-2, ip_table.get() + i * dim12,
|
425
|
+
tab
|
426
|
+
);
|
427
|
+
} else {
|
428
|
+
// fill with NaNs so that they are ignored during
|
429
|
+
// LUT quantization
|
430
|
+
memset (tab, -1, sizeof(float) * dim12);
|
431
|
+
}
|
432
|
+
}
|
433
|
+
|
434
|
+
} else {
|
435
|
+
|
436
|
+
std::unique_ptr<float[]> xrel(new float[n * nprobe * d]);
|
437
|
+
biases.resize(n * nprobe);
|
438
|
+
memset(biases.get(), 0, sizeof(float) * n * nprobe);
|
439
|
+
|
440
|
+
#pragma omp parallel for if (n * nprobe > 8000)
|
441
|
+
for(idx_t ij = 0; ij < n * nprobe; ij++) {
|
442
|
+
idx_t i = ij / nprobe;
|
443
|
+
float *xij = &xrel[ij * d];
|
444
|
+
idx_t cij = coarse_ids[ij];
|
445
|
+
|
446
|
+
if (cij >= 0) {
|
447
|
+
ivfpq.quantizer->compute_residual(
|
448
|
+
x + i * d, xij, cij);
|
449
|
+
} else {
|
450
|
+
// will fill with NaNs
|
451
|
+
memset(xij, -1, sizeof(float) * d);
|
452
|
+
}
|
453
|
+
}
|
454
|
+
|
455
|
+
pq.compute_distance_tables (
|
456
|
+
n * nprobe, xrel.get(), dis_tables.get());
|
457
|
+
|
458
|
+
}
|
459
|
+
|
460
|
+
} else if (ivfpq.metric_type == METRIC_INNER_PRODUCT) {
|
461
|
+
dis_tables.resize(n * dim12);
|
462
|
+
pq.compute_inner_prod_tables (n, x, dis_tables.get());
|
463
|
+
// compute_inner_prod_tables(pq, n, x, dis_tables.get());
|
464
|
+
|
465
|
+
biases.resize(n * nprobe);
|
466
|
+
memcpy(biases.get(), coarse_dis, sizeof(float) * n * nprobe);
|
467
|
+
} else {
|
468
|
+
FAISS_THROW_FMT("metric %d not supported", ivfpq.metric_type);
|
469
|
+
}
|
470
|
+
|
471
|
+
} else {
|
472
|
+
dis_tables.resize(n * dim12);
|
473
|
+
if (ivfpq.metric_type == METRIC_L2) {
|
474
|
+
pq.compute_distance_tables (n, x, dis_tables.get());
|
475
|
+
} else if (ivfpq.metric_type == METRIC_INNER_PRODUCT) {
|
476
|
+
pq.compute_inner_prod_tables (n, x, dis_tables.get());
|
477
|
+
} else {
|
478
|
+
FAISS_THROW_FMT("metric %d not supported", ivfpq.metric_type);
|
479
|
+
}
|
480
|
+
}
|
481
|
+
|
482
|
+
}
|
483
|
+
|
484
|
+
void IndexIVFPQFastScan::compute_LUT_uint8(
|
485
|
+
size_t n, const float *x,
|
486
|
+
const idx_t *coarse_ids, const float *coarse_dis,
|
487
|
+
AlignedTable<uint8_t> & dis_tables,
|
488
|
+
AlignedTable<uint16_t> & biases,
|
489
|
+
float * normalizers
|
490
|
+
) const {
|
491
|
+
const IndexIVFPQFastScan & ivfpq = *this;
|
492
|
+
AlignedTable<float> dis_tables_float;
|
493
|
+
AlignedTable<float> biases_float;
|
494
|
+
|
495
|
+
uint64_t t0 = get_cy();
|
496
|
+
compute_LUT(
|
497
|
+
n, x,
|
498
|
+
coarse_ids, coarse_dis,
|
499
|
+
dis_tables_float, biases_float
|
500
|
+
);
|
501
|
+
IVFFastScan_stats.t_compute_distance_tables += get_cy() - t0;
|
502
|
+
|
503
|
+
bool lut_is_3d = ivfpq.by_residual && ivfpq.metric_type == METRIC_L2;
|
504
|
+
size_t dim123 = pq.ksub * pq.M;
|
505
|
+
size_t dim123_2 = pq.ksub * M2;
|
506
|
+
if (lut_is_3d) {
|
507
|
+
dim123 *= nprobe;
|
508
|
+
dim123_2 *= nprobe;
|
509
|
+
}
|
510
|
+
dis_tables.resize(n * dim123_2);
|
511
|
+
if (biases_float.get()) {
|
512
|
+
biases.resize(n * nprobe);
|
513
|
+
}
|
514
|
+
uint64_t t1 = get_cy();
|
515
|
+
|
516
|
+
#pragma omp parallel for if (n > 100)
|
517
|
+
for(int64_t i = 0; i < n; i++) {
|
518
|
+
const float *t_in = dis_tables_float.get() + i * dim123;
|
519
|
+
const float *b_in = nullptr;
|
520
|
+
uint8_t *t_out = dis_tables.get() + i * dim123_2;
|
521
|
+
uint16_t *b_out = nullptr;
|
522
|
+
if (biases_float.get()) {
|
523
|
+
b_in = biases_float.get() + i * nprobe;
|
524
|
+
b_out = biases.get() + i * nprobe;
|
525
|
+
}
|
526
|
+
|
527
|
+
quantize_LUT_and_bias(
|
528
|
+
nprobe, pq.M, pq.ksub, lut_is_3d,
|
529
|
+
t_in, b_in,
|
530
|
+
t_out, M2, b_out,
|
531
|
+
normalizers + 2 * i, normalizers + 2 * i + 1
|
532
|
+
);
|
533
|
+
}
|
534
|
+
IVFFastScan_stats.t_round += get_cy() - t1;
|
535
|
+
|
536
|
+
}
|
537
|
+
|
538
|
+
|
539
|
+
/*********************************************************
|
540
|
+
* Search functions
|
541
|
+
*********************************************************/
|
542
|
+
|
543
|
+
template<bool is_max>
|
544
|
+
void IndexIVFPQFastScan::search_dispatch_implem(
|
545
|
+
idx_t n,
|
546
|
+
const float* x,
|
547
|
+
idx_t k,
|
548
|
+
float* distances,
|
549
|
+
idx_t* labels) const
|
550
|
+
{
|
551
|
+
using Cfloat = typename std::conditional<is_max,
|
552
|
+
CMax<float, int64_t>, CMin<float, int64_t> >::type;
|
553
|
+
|
554
|
+
using C = typename std::conditional<is_max,
|
555
|
+
CMax<uint16_t, int64_t>, CMin<uint16_t, int64_t> >::type;
|
556
|
+
|
557
|
+
if (n == 0) {
|
558
|
+
return;
|
559
|
+
}
|
560
|
+
|
561
|
+
// actual implementation used
|
562
|
+
int impl = implem;
|
563
|
+
|
564
|
+
if (impl == 0) {
|
565
|
+
if (bbs == 32) {
|
566
|
+
impl = 12;
|
567
|
+
} else {
|
568
|
+
impl = 10;
|
569
|
+
}
|
570
|
+
if (k > 20) {
|
571
|
+
impl ++;
|
572
|
+
}
|
573
|
+
}
|
574
|
+
|
575
|
+
if (impl == 1) {
|
576
|
+
search_implem_1<Cfloat>(n, x, k, distances, labels);
|
577
|
+
} else if (impl == 2) {
|
578
|
+
search_implem_2<C>(n, x, k, distances, labels);
|
579
|
+
|
580
|
+
} else if (impl >= 10 && impl <= 13) {
|
581
|
+
size_t ndis = 0, nlist_visited = 0;
|
582
|
+
|
583
|
+
if (n < 2) {
|
584
|
+
if (impl == 12 || impl == 13) {
|
585
|
+
search_implem_12<C>
|
586
|
+
(n, x, k, distances, labels, impl, &ndis, &nlist_visited);
|
587
|
+
} else {
|
588
|
+
search_implem_10<C>
|
589
|
+
(n, x, k, distances, labels, impl, &ndis, &nlist_visited);
|
590
|
+
}
|
591
|
+
} else {
|
592
|
+
// explicitly slice over threads
|
593
|
+
int nslice;
|
594
|
+
if (n <= omp_get_max_threads()) {
|
595
|
+
nslice = n;
|
596
|
+
} else if (by_residual && metric_type == METRIC_L2) {
|
597
|
+
// make sure we don't make too big LUT tables
|
598
|
+
size_t lut_size_per_query =
|
599
|
+
pq.M * pq.ksub * nprobe * (sizeof(float) + sizeof(uint8_t));
|
600
|
+
|
601
|
+
size_t max_lut_size = precomputed_table_max_bytes;
|
602
|
+
// how many queries we can handle within mem budget
|
603
|
+
size_t nq_ok = std::max(max_lut_size / lut_size_per_query, size_t(1));
|
604
|
+
nslice = roundup(std::max(size_t(n / nq_ok), size_t(1)), omp_get_max_threads());
|
605
|
+
} else {
|
606
|
+
// LUTs unlikely to be a limiting factor
|
607
|
+
nslice = omp_get_max_threads();
|
608
|
+
}
|
609
|
+
|
610
|
+
#pragma omp parallel for reduction(+: ndis, nlist_visited)
|
611
|
+
for (int slice = 0; slice < nslice; slice++) {
|
612
|
+
idx_t i0 = n * slice / nslice;
|
613
|
+
idx_t i1 = n * (slice + 1) / nslice;
|
614
|
+
float *dis_i = distances + i0 * k;
|
615
|
+
idx_t *lab_i = labels + i0 * k;
|
616
|
+
if (impl == 12 || impl == 13) {
|
617
|
+
search_implem_12<C>(
|
618
|
+
i1 - i0, x + i0 * d, k, dis_i, lab_i,
|
619
|
+
impl, &ndis, &nlist_visited
|
620
|
+
);
|
621
|
+
} else {
|
622
|
+
search_implem_10<C>(
|
623
|
+
i1 - i0, x + i0 * d, k, dis_i, lab_i,
|
624
|
+
impl, &ndis, &nlist_visited
|
625
|
+
);
|
626
|
+
}
|
627
|
+
}
|
628
|
+
}
|
629
|
+
indexIVF_stats.nq += n;
|
630
|
+
indexIVF_stats.ndis += ndis;
|
631
|
+
indexIVF_stats.nlist += nlist_visited;
|
632
|
+
} else {
|
633
|
+
FAISS_THROW_FMT("implem %d does not exist", implem);
|
634
|
+
}
|
635
|
+
|
636
|
+
}
|
637
|
+
|
638
|
+
|
639
|
+
void IndexIVFPQFastScan::search(
|
640
|
+
idx_t n, const float* x, idx_t k,
|
641
|
+
float* distances, idx_t* labels) const
|
642
|
+
{
|
643
|
+
if (metric_type == METRIC_L2) {
|
644
|
+
search_dispatch_implem<true>(n, x, k, distances, labels);
|
645
|
+
} else {
|
646
|
+
search_dispatch_implem<false>(n, x, k, distances, labels);
|
647
|
+
}
|
648
|
+
}
|
649
|
+
|
650
|
+
template<class C>
|
651
|
+
void IndexIVFPQFastScan::search_implem_1(
|
652
|
+
idx_t n, const float* x, idx_t k,
|
653
|
+
float* distances, idx_t* labels) const
|
654
|
+
{
|
655
|
+
FAISS_THROW_IF_NOT(orig_invlists);
|
656
|
+
|
657
|
+
std::unique_ptr<idx_t[]> coarse_ids(new idx_t[n * nprobe]);
|
658
|
+
std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
|
659
|
+
|
660
|
+
quantizer->search (n, x, nprobe, coarse_dis.get(), coarse_ids.get());
|
661
|
+
|
662
|
+
size_t dim12 = pq.ksub * pq.M;
|
663
|
+
AlignedTable<float> dis_tables;
|
664
|
+
AlignedTable<float> biases;
|
665
|
+
|
666
|
+
compute_LUT (
|
667
|
+
n, x,
|
668
|
+
coarse_ids.get(), coarse_dis.get(),
|
669
|
+
dis_tables, biases
|
670
|
+
);
|
671
|
+
|
672
|
+
bool single_LUT = !(by_residual && metric_type == METRIC_L2);
|
673
|
+
|
674
|
+
size_t ndis = 0, nlist_visited = 0;
|
675
|
+
|
676
|
+
#pragma omp parallel for reduction(+: ndis, nlist_visited)
|
677
|
+
for(idx_t i = 0; i < n; i++) {
|
678
|
+
int64_t *heap_ids = labels + i * k;
|
679
|
+
float *heap_dis = distances + i * k;
|
680
|
+
heap_heapify<C> (k, heap_dis, heap_ids);
|
681
|
+
float *LUT = nullptr;
|
682
|
+
|
683
|
+
if (single_LUT) {
|
684
|
+
LUT = dis_tables.get() + i * dim12;
|
685
|
+
}
|
686
|
+
for(idx_t j = 0; j < nprobe; j++) {
|
687
|
+
if (!single_LUT) {
|
688
|
+
LUT = dis_tables.get() + (i * nprobe + j) * dim12;
|
689
|
+
}
|
690
|
+
idx_t list_no = coarse_ids[i * nprobe + j];
|
691
|
+
if (list_no < 0) continue;
|
692
|
+
size_t ls = orig_invlists->list_size(list_no);
|
693
|
+
if (ls == 0) continue;
|
694
|
+
InvertedLists::ScopedCodes codes(orig_invlists, list_no);
|
695
|
+
InvertedLists::ScopedIds ids(orig_invlists, list_no);
|
696
|
+
|
697
|
+
float bias = biases.get() ? biases[i * nprobe + j] : 0;
|
698
|
+
|
699
|
+
pq_estimators_from_tables_generic<C>(
|
700
|
+
pq, pq.nbits, codes.get(), ls,
|
701
|
+
LUT, ids.get(), bias,
|
702
|
+
k, heap_dis, heap_ids
|
703
|
+
);
|
704
|
+
nlist_visited ++;
|
705
|
+
ndis ++;
|
706
|
+
}
|
707
|
+
heap_reorder<C> (k, heap_dis, heap_ids);
|
708
|
+
}
|
709
|
+
indexIVF_stats.nq += n;
|
710
|
+
indexIVF_stats.ndis += ndis;
|
711
|
+
indexIVF_stats.nlist += nlist_visited;
|
712
|
+
}
|
713
|
+
|
714
|
+
template<class C>
|
715
|
+
void IndexIVFPQFastScan::search_implem_2(
|
716
|
+
idx_t n, const float* x, idx_t k,
|
717
|
+
float* distances, idx_t* labels) const
|
718
|
+
{
|
719
|
+
FAISS_THROW_IF_NOT(orig_invlists);
|
720
|
+
|
721
|
+
std::unique_ptr<idx_t[]> coarse_ids(new idx_t[n * nprobe]);
|
722
|
+
std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
|
723
|
+
|
724
|
+
quantizer->search (n, x, nprobe, coarse_dis.get(), coarse_ids.get());
|
725
|
+
|
726
|
+
size_t dim12 = pq.ksub * M2;
|
727
|
+
AlignedTable<uint8_t> dis_tables;
|
728
|
+
AlignedTable<uint16_t> biases;
|
729
|
+
std::unique_ptr<float[]> normalizers(new float[2 * n]);
|
730
|
+
|
731
|
+
compute_LUT_uint8 (
|
732
|
+
n, x,
|
733
|
+
coarse_ids.get(), coarse_dis.get(),
|
734
|
+
dis_tables, biases,
|
735
|
+
normalizers.get()
|
736
|
+
);
|
737
|
+
|
738
|
+
|
739
|
+
bool single_LUT = !(by_residual && metric_type == METRIC_L2);
|
740
|
+
|
741
|
+
size_t ndis = 0, nlist_visited = 0;
|
742
|
+
|
743
|
+
#pragma omp parallel for reduction(+: ndis, nlist_visited)
|
744
|
+
for(idx_t i = 0; i < n; i++) {
|
745
|
+
std::vector<uint16_t> tmp_dis(k);
|
746
|
+
int64_t *heap_ids = labels + i * k;
|
747
|
+
uint16_t *heap_dis = tmp_dis.data();
|
748
|
+
heap_heapify<C> (k, heap_dis, heap_ids);
|
749
|
+
const uint8_t *LUT = nullptr;
|
750
|
+
|
751
|
+
if (single_LUT) {
|
752
|
+
LUT = dis_tables.get() + i * dim12;
|
753
|
+
}
|
754
|
+
for(idx_t j = 0; j < nprobe; j++) {
|
755
|
+
if (!single_LUT) {
|
756
|
+
LUT = dis_tables.get() + (i * nprobe + j) * dim12;
|
757
|
+
}
|
758
|
+
idx_t list_no = coarse_ids[i * nprobe + j];
|
759
|
+
if (list_no < 0) continue;
|
760
|
+
size_t ls = orig_invlists->list_size(list_no);
|
761
|
+
if (ls == 0) continue;
|
762
|
+
InvertedLists::ScopedCodes codes(orig_invlists, list_no);
|
763
|
+
InvertedLists::ScopedIds ids(orig_invlists, list_no);
|
764
|
+
|
765
|
+
uint16_t bias = biases.get() ? biases[i * nprobe + j] : 0;
|
766
|
+
|
767
|
+
pq_estimators_from_tables_generic<C>(
|
768
|
+
pq, pq.nbits, codes.get(), ls,
|
769
|
+
LUT, ids.get(), bias,
|
770
|
+
k, heap_dis, heap_ids
|
771
|
+
);
|
772
|
+
|
773
|
+
nlist_visited++;
|
774
|
+
ndis += ls;
|
775
|
+
}
|
776
|
+
heap_reorder<C> (k, heap_dis, heap_ids);
|
777
|
+
// convert distances to float
|
778
|
+
{
|
779
|
+
float one_a = 1 / normalizers[2 * i], b = normalizers[2 * i + 1];
|
780
|
+
if (skip & 16) {
|
781
|
+
one_a = 1;
|
782
|
+
b = 0;
|
783
|
+
}
|
784
|
+
float *heap_dis_float = distances + i * k;
|
785
|
+
for (int j = 0; j < k; j++) {
|
786
|
+
heap_dis_float[j] = b + heap_dis[j] * one_a;
|
787
|
+
}
|
788
|
+
}
|
789
|
+
}
|
790
|
+
indexIVF_stats.nq += n;
|
791
|
+
indexIVF_stats.ndis += ndis;
|
792
|
+
indexIVF_stats.nlist += nlist_visited;
|
793
|
+
}
|
794
|
+
|
795
|
+
|
796
|
+
|
797
|
+
template<class C>
|
798
|
+
void IndexIVFPQFastScan::search_implem_10(
|
799
|
+
idx_t n, const float* x, idx_t k,
|
800
|
+
float* distances, idx_t* labels,
|
801
|
+
int impl, size_t *ndis_out, size_t *nlist_out) const
|
802
|
+
{
|
803
|
+
memset(distances, -1, sizeof(float) * k * n);
|
804
|
+
memset(labels, -1, sizeof(idx_t) * k * n);
|
805
|
+
|
806
|
+
using HeapHC = HeapHandler<C, true>;
|
807
|
+
using ReservoirHC = ReservoirHandler<C, true>;
|
808
|
+
using SingleResultHC = SingleResultHandler<C, true>;
|
809
|
+
|
810
|
+
|
811
|
+
std::unique_ptr<idx_t[]> coarse_ids(new idx_t[n * nprobe]);
|
812
|
+
std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
|
813
|
+
|
814
|
+
uint64_t times[10];
|
815
|
+
memset(times, 0, sizeof(times));
|
816
|
+
int ti = 0;
|
817
|
+
#define TIC times[ti++] = get_cy()
|
818
|
+
TIC;
|
819
|
+
|
820
|
+
quantizer->search (n, x, nprobe, coarse_dis.get(), coarse_ids.get());
|
821
|
+
|
822
|
+
TIC;
|
823
|
+
|
824
|
+
size_t dim12 = pq.ksub * M2;
|
825
|
+
AlignedTable<uint8_t> dis_tables;
|
826
|
+
AlignedTable<uint16_t> biases;
|
827
|
+
std::unique_ptr<float[]> normalizers (new float[2 * n]);
|
828
|
+
|
829
|
+
compute_LUT_uint8 (
|
830
|
+
n, x,
|
831
|
+
coarse_ids.get(), coarse_dis.get(),
|
832
|
+
dis_tables, biases, normalizers.get()
|
833
|
+
);
|
834
|
+
|
835
|
+
TIC;
|
836
|
+
|
837
|
+
bool single_LUT = !(by_residual && metric_type == METRIC_L2);
|
838
|
+
|
839
|
+
TIC;
|
840
|
+
size_t ndis = 0, nlist_visited = 0;
|
841
|
+
|
842
|
+
{
|
843
|
+
AlignedTable<uint16_t> tmp_distances(k);
|
844
|
+
for(idx_t i = 0; i < n; i++) {
|
845
|
+
const uint8_t *LUT = nullptr;
|
846
|
+
int qmap1[1] = {0};
|
847
|
+
std::unique_ptr<SIMDResultHandler<C, true> > handler;
|
848
|
+
|
849
|
+
if (k == 1) {
|
850
|
+
handler.reset(new SingleResultHC(1, 0));
|
851
|
+
} else if (impl == 10) {
|
852
|
+
handler.reset(new HeapHC(1, tmp_distances.get(), labels + i * k, k, 0));
|
853
|
+
} else if (impl == 11) {
|
854
|
+
handler.reset(new ReservoirHC(1, 0, k, 2 * k));
|
855
|
+
} else {
|
856
|
+
FAISS_THROW_MSG("invalid");
|
857
|
+
}
|
858
|
+
|
859
|
+
handler->q_map = qmap1;
|
860
|
+
|
861
|
+
if (single_LUT) {
|
862
|
+
LUT = dis_tables.get() + i * dim12;
|
863
|
+
}
|
864
|
+
for(idx_t j = 0; j < nprobe; j++) {
|
865
|
+
size_t ij = i * nprobe + j;
|
866
|
+
if (!single_LUT) {
|
867
|
+
LUT = dis_tables.get() + ij * dim12;
|
868
|
+
}
|
869
|
+
if (biases.get()) {
|
870
|
+
handler->dbias = biases.get() + ij;
|
871
|
+
}
|
872
|
+
|
873
|
+
idx_t list_no = coarse_ids[ij];
|
874
|
+
if (list_no < 0) continue;
|
875
|
+
size_t ls = invlists->list_size(list_no);
|
876
|
+
if (ls == 0) continue;
|
877
|
+
|
878
|
+
InvertedLists::ScopedCodes codes(invlists, list_no);
|
879
|
+
InvertedLists::ScopedIds ids(invlists, list_no);
|
880
|
+
|
881
|
+
handler->ntotal = ls;
|
882
|
+
handler->id_map = ids.get();
|
883
|
+
|
884
|
+
#define DISPATCH(classHC) \
|
885
|
+
if(auto *res = dynamic_cast<classHC* > (handler.get())) { \
|
886
|
+
pq4_accumulate_loop( \
|
887
|
+
1, roundup(ls, bbs), bbs, M2, \
|
888
|
+
codes.get(), LUT, \
|
889
|
+
*res \
|
890
|
+
); \
|
891
|
+
}
|
892
|
+
DISPATCH(HeapHC)
|
893
|
+
else DISPATCH(ReservoirHC)
|
894
|
+
else DISPATCH(SingleResultHC)
|
895
|
+
#undef DISPATCH
|
896
|
+
|
897
|
+
nlist_visited ++;
|
898
|
+
ndis ++;
|
899
|
+
}
|
900
|
+
|
901
|
+
handler->to_flat_arrays(
|
902
|
+
distances + i * k, labels + i * k,
|
903
|
+
skip & 16 ? nullptr : normalizers.get() + i * 2
|
904
|
+
);
|
905
|
+
}
|
906
|
+
}
|
907
|
+
*ndis_out = ndis;
|
908
|
+
*nlist_out = nlist;
|
909
|
+
}
|
910
|
+
|
911
|
+
|
912
|
+
|
913
|
+
template<class C>
|
914
|
+
void IndexIVFPQFastScan::search_implem_12(
|
915
|
+
idx_t n, const float* x, idx_t k,
|
916
|
+
float* distances, idx_t* labels,
|
917
|
+
int impl, size_t *ndis_out, size_t *nlist_out) const
|
918
|
+
{
|
919
|
+
if (n == 0) { // does not work well with reservoir
|
920
|
+
return;
|
921
|
+
}
|
922
|
+
FAISS_THROW_IF_NOT(bbs == 32);
|
923
|
+
|
924
|
+
std::unique_ptr<idx_t[]> coarse_ids(new idx_t[n * nprobe]);
|
925
|
+
std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
|
926
|
+
|
927
|
+
uint64_t times[10];
|
928
|
+
memset(times, 0, sizeof(times));
|
929
|
+
int ti = 0;
|
930
|
+
#define TIC times[ti++] = get_cy()
|
931
|
+
TIC;
|
932
|
+
|
933
|
+
quantizer->search (n, x, nprobe, coarse_dis.get(), coarse_ids.get());
|
934
|
+
|
935
|
+
TIC;
|
936
|
+
|
937
|
+
size_t dim12 = pq.ksub * M2;
|
938
|
+
AlignedTable<uint8_t> dis_tables;
|
939
|
+
AlignedTable<uint16_t> biases;
|
940
|
+
std::unique_ptr<float[]> normalizers (new float[2 * n]);
|
941
|
+
|
942
|
+
compute_LUT_uint8 (
|
943
|
+
n, x,
|
944
|
+
coarse_ids.get(), coarse_dis.get(),
|
945
|
+
dis_tables, biases, normalizers.get()
|
946
|
+
);
|
947
|
+
|
948
|
+
TIC;
|
949
|
+
|
950
|
+
struct QC {
|
951
|
+
int qno; // sequence number of the query
|
952
|
+
int list_no; // list to visit
|
953
|
+
int rank; // this is the rank'th result of the coarse quantizer
|
954
|
+
};
|
955
|
+
bool single_LUT = !(by_residual && metric_type == METRIC_L2);
|
956
|
+
|
957
|
+
std::vector<QC> qcs;
|
958
|
+
{
|
959
|
+
int ij = 0;
|
960
|
+
for(int i = 0; i < n; i++) {
|
961
|
+
for(int j = 0; j < nprobe; j++) {
|
962
|
+
if (coarse_ids[ij] >= 0) {
|
963
|
+
qcs.push_back(QC{i, int(coarse_ids[ij]), int(j)});
|
964
|
+
}
|
965
|
+
ij++;
|
966
|
+
}
|
967
|
+
}
|
968
|
+
std::sort(
|
969
|
+
qcs.begin(), qcs.end(),
|
970
|
+
[](const QC &a, const QC & b) {
|
971
|
+
return a.list_no < b.list_no;
|
972
|
+
}
|
973
|
+
);
|
974
|
+
}
|
975
|
+
TIC;
|
976
|
+
|
977
|
+
// prepare the result handlers
|
978
|
+
|
979
|
+
std::unique_ptr<SIMDResultHandler<C, true> > handler;
|
980
|
+
AlignedTable<uint16_t> tmp_distances;
|
981
|
+
|
982
|
+
using HeapHC = HeapHandler<C, true>;
|
983
|
+
using ReservoirHC = ReservoirHandler<C, true>;
|
984
|
+
using SingleResultHC = SingleResultHandler<C, true>;
|
985
|
+
|
986
|
+
if (k == 1) {
|
987
|
+
handler.reset(new SingleResultHC(n, 0));
|
988
|
+
} else if (impl == 12) {
|
989
|
+
tmp_distances.resize(n * k);
|
990
|
+
handler.reset(new HeapHC(n, tmp_distances.get(), labels, k, 0));
|
991
|
+
} else if (impl == 13) {
|
992
|
+
handler.reset(new ReservoirHC(n, 0, k, 2 * k));
|
993
|
+
}
|
994
|
+
|
995
|
+
int qbs2 = this->qbs2 ? this->qbs2 : 11;
|
996
|
+
|
997
|
+
std::vector<uint16_t> tmp_bias;
|
998
|
+
if (biases.get()) {
|
999
|
+
tmp_bias.resize(qbs2);
|
1000
|
+
handler->dbias = tmp_bias.data();
|
1001
|
+
}
|
1002
|
+
TIC;
|
1003
|
+
|
1004
|
+
size_t ndis = 0;
|
1005
|
+
|
1006
|
+
size_t i0 = 0;
|
1007
|
+
uint64_t t_copy_pack = 0, t_scan = 0;
|
1008
|
+
while (i0 < qcs.size()) {
|
1009
|
+
uint64_t tt0 = get_cy();
|
1010
|
+
|
1011
|
+
// find all queries that access this inverted list
|
1012
|
+
int list_no = qcs[i0].list_no;
|
1013
|
+
size_t i1 = i0 + 1;
|
1014
|
+
|
1015
|
+
while(i1 < qcs.size() && i1 < i0 + qbs2) {
|
1016
|
+
if (qcs[i1].list_no != list_no) {
|
1017
|
+
break;
|
1018
|
+
}
|
1019
|
+
i1++;
|
1020
|
+
}
|
1021
|
+
|
1022
|
+
size_t list_size = invlists->list_size(list_no);
|
1023
|
+
|
1024
|
+
if (list_size == 0) {
|
1025
|
+
i0 = i1;
|
1026
|
+
continue;
|
1027
|
+
}
|
1028
|
+
|
1029
|
+
// re-organize LUTs and biases into the right order
|
1030
|
+
int nc = i1 - i0;
|
1031
|
+
|
1032
|
+
std::vector<int> q_map(nc), lut_entries(nc);
|
1033
|
+
AlignedTable<uint8_t> LUT(nc * dim12);
|
1034
|
+
memset(LUT.get(), -1, nc * dim12);
|
1035
|
+
int qbs = pq4_preferred_qbs(nc);
|
1036
|
+
|
1037
|
+
for(size_t i = i0; i < i1; i++) {
|
1038
|
+
const QC & qc = qcs[i];
|
1039
|
+
q_map[i - i0] = qc.qno;
|
1040
|
+
int ij = qc.qno * nprobe + qc.rank;
|
1041
|
+
lut_entries[i - i0] = single_LUT ? qc.qno : ij;
|
1042
|
+
if (biases.get()) {
|
1043
|
+
tmp_bias[i - i0] = biases[ij];
|
1044
|
+
}
|
1045
|
+
}
|
1046
|
+
pq4_pack_LUT_qbs_q_map(
|
1047
|
+
qbs, M2, dis_tables.get(), lut_entries.data(),
|
1048
|
+
LUT.get()
|
1049
|
+
);
|
1050
|
+
|
1051
|
+
// access the inverted list
|
1052
|
+
|
1053
|
+
ndis += (i1 - i0) * list_size;
|
1054
|
+
|
1055
|
+
InvertedLists::ScopedCodes codes(invlists, list_no);
|
1056
|
+
InvertedLists::ScopedIds ids(invlists, list_no);
|
1057
|
+
|
1058
|
+
// prepare the handler
|
1059
|
+
|
1060
|
+
handler->ntotal = list_size;
|
1061
|
+
handler->q_map = q_map.data();
|
1062
|
+
handler->id_map = ids.get();
|
1063
|
+
uint64_t tt1 = get_cy();
|
1064
|
+
|
1065
|
+
#define DISPATCH(classHC) \
|
1066
|
+
if(auto *res = dynamic_cast<classHC* > (handler.get())) { \
|
1067
|
+
pq4_accumulate_loop_qbs( \
|
1068
|
+
qbs, list_size, M2, \
|
1069
|
+
codes.get(), LUT.get(), \
|
1070
|
+
*res \
|
1071
|
+
); \
|
1072
|
+
}
|
1073
|
+
DISPATCH(HeapHC)
|
1074
|
+
else DISPATCH(ReservoirHC)
|
1075
|
+
else DISPATCH(SingleResultHC)
|
1076
|
+
|
1077
|
+
// prepare for next loop
|
1078
|
+
i0 = i1;
|
1079
|
+
|
1080
|
+
uint64_t tt2 = get_cy();
|
1081
|
+
t_copy_pack += tt1 - tt0;
|
1082
|
+
t_scan += tt2 - tt1;
|
1083
|
+
}
|
1084
|
+
TIC;
|
1085
|
+
|
1086
|
+
// labels is in-place for HeapHC
|
1087
|
+
handler->to_flat_arrays(
|
1088
|
+
distances, labels,
|
1089
|
+
skip & 16 ? nullptr : normalizers.get()
|
1090
|
+
);
|
1091
|
+
|
1092
|
+
TIC;
|
1093
|
+
|
1094
|
+
// these stats are not thread-safe
|
1095
|
+
|
1096
|
+
for(int i = 1; i < ti; i++) {
|
1097
|
+
IVFFastScan_stats.times[i] += times[i] - times[i-1];
|
1098
|
+
}
|
1099
|
+
IVFFastScan_stats.t_copy_pack += t_copy_pack;
|
1100
|
+
IVFFastScan_stats.t_scan += t_scan;
|
1101
|
+
|
1102
|
+
if (auto *rh = dynamic_cast<ReservoirHC*> (handler.get())) {
|
1103
|
+
for (int i = 0; i < 4; i++) {
|
1104
|
+
IVFFastScan_stats.reservoir_times[i] += rh->times[i];
|
1105
|
+
}
|
1106
|
+
}
|
1107
|
+
|
1108
|
+
*ndis_out = ndis;
|
1109
|
+
*nlist_out = nlist;
|
1110
|
+
|
1111
|
+
}
|
1112
|
+
|
1113
|
+
|
1114
|
+
IVFFastScanStats IVFFastScan_stats;
|
1115
|
+
|
1116
|
+
} // namespace faiss
|