faiss 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +9 -2
- data/ext/faiss/index.cpp +1 -1
- data/ext/faiss/index_binary.cpp +2 -2
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +7 -7
- data/vendor/faiss/faiss/AutoTune.h +1 -2
- data/vendor/faiss/faiss/Clustering.cpp +39 -22
- data/vendor/faiss/faiss/Clustering.h +40 -21
- data/vendor/faiss/faiss/IVFlib.cpp +26 -12
- data/vendor/faiss/faiss/Index.cpp +1 -1
- data/vendor/faiss/faiss/Index.h +40 -10
- data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
- data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinary.h +8 -19
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +107 -188
- data/vendor/faiss/faiss/IndexFastScan.cpp +95 -146
- data/vendor/faiss/faiss/IndexFastScan.h +9 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +206 -10
- data/vendor/faiss/faiss/IndexFlat.h +20 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +170 -5
- data/vendor/faiss/faiss/IndexFlatCodes.h +23 -4
- data/vendor/faiss/faiss/IndexHNSW.cpp +231 -382
- data/vendor/faiss/faiss/IndexHNSW.h +62 -49
- data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
- data/vendor/faiss/faiss/IndexIDMap.h +24 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +162 -56
- data/vendor/faiss/faiss/IndexIVF.h +46 -6
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +33 -26
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +6 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +502 -401
- data/vendor/faiss/faiss/IndexIVFFastScan.h +63 -26
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
- data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +79 -125
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +39 -52
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
- data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
- data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
- data/vendor/faiss/faiss/IndexLattice.h +3 -22
- data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -33
- data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
- data/vendor/faiss/faiss/IndexNSG.h +11 -11
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
- data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
- data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
- data/vendor/faiss/faiss/IndexPQ.h +1 -4
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRefine.cpp +54 -24
- data/vendor/faiss/faiss/IndexRefine.h +7 -0
- data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +25 -17
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
- data/vendor/faiss/faiss/IndexShards.cpp +21 -29
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
- data/vendor/faiss/faiss/MatrixStats.h +21 -9
- data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
- data/vendor/faiss/faiss/MetricType.h +7 -2
- data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
- data/vendor/faiss/faiss/VectorTransform.h +7 -7
- data/vendor/faiss/faiss/clone_index.cpp +15 -10
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
- data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +123 -8
- data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +13 -0
- data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +30 -12
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +14 -9
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +20 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
- data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +142 -17
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +7 -1
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +332 -40
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
- data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +26 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +10 -3
- data/vendor/faiss/faiss/impl/DistanceComputer.h +70 -1
- data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
- data/vendor/faiss/faiss/impl/FaissException.h +13 -34
- data/vendor/faiss/faiss/impl/HNSW.cpp +605 -186
- data/vendor/faiss/faiss/impl/HNSW.h +52 -30
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +11 -9
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +42 -27
- data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -22
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +6 -2
- data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
- data/vendor/faiss/faiss/impl/ResultHandler.h +347 -172
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1104 -147
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -8
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +285 -42
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
- data/vendor/faiss/faiss/impl/index_read.cpp +74 -34
- data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
- data/vendor/faiss/faiss/impl/index_write.cpp +88 -51
- data/vendor/faiss/faiss/impl/io.cpp +23 -15
- data/vendor/faiss/faiss/impl/io.h +4 -4
- data/vendor/faiss/faiss/impl/io_macros.h +6 -0
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +40 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +14 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +487 -49
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +481 -225
- data/vendor/faiss/faiss/index_factory.cpp +41 -20
- data/vendor/faiss/faiss/index_io.h +12 -5
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +10 -2
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +73 -17
- data/vendor/faiss/faiss/invlists/InvertedLists.h +26 -8
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +24 -9
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +4 -4
- data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
- data/vendor/faiss/faiss/utils/Heap.h +105 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
- data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
- data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
- data/vendor/faiss/faiss/utils/bf16.h +36 -0
- data/vendor/faiss/faiss/utils/distances.cpp +147 -123
- data/vendor/faiss/faiss/utils/distances.h +86 -9
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +1589 -243
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
- data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
- data/vendor/faiss/faiss/utils/fp16.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +163 -111
- data/vendor/faiss/faiss/utils/hamming.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +19 -88
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
- data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
- data/vendor/faiss/faiss/utils/prefetch.h +77 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
- data/vendor/faiss/faiss/utils/random.cpp +43 -0
- data/vendor/faiss/faiss/utils/random.h +25 -0
- data/vendor/faiss/faiss/utils/simdlib.h +10 -1
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +77 -79
- data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
- data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
- data/vendor/faiss/faiss/utils/sorting.h +27 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
- data/vendor/faiss/faiss/utils/utils.cpp +120 -7
- data/vendor/faiss/faiss/utils/utils.h +60 -20
- metadata +23 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
|
|
12
12
|
#include <omp.h>
|
|
13
13
|
#include <cstdint>
|
|
14
|
+
#include <memory>
|
|
14
15
|
#include <mutex>
|
|
15
16
|
|
|
16
17
|
#include <algorithm>
|
|
@@ -45,7 +46,7 @@ Level1Quantizer::Level1Quantizer(Index* quantizer, size_t nlist)
|
|
|
45
46
|
cp.niter = 10;
|
|
46
47
|
}
|
|
47
48
|
|
|
48
|
-
Level1Quantizer::Level1Quantizer()
|
|
49
|
+
Level1Quantizer::Level1Quantizer() = default;
|
|
49
50
|
|
|
50
51
|
Level1Quantizer::~Level1Quantizer() {
|
|
51
52
|
if (own_fields) {
|
|
@@ -65,8 +66,8 @@ void Level1Quantizer::train_q1(
|
|
|
65
66
|
} else if (quantizer_trains_alone == 1) {
|
|
66
67
|
if (verbose)
|
|
67
68
|
printf("IVF quantizer trains alone...\n");
|
|
68
|
-
quantizer->train(n, x);
|
|
69
69
|
quantizer->verbose = verbose;
|
|
70
|
+
quantizer->train(n, x);
|
|
70
71
|
FAISS_THROW_IF_NOT_MSG(
|
|
71
72
|
quantizer->ntotal == nlist,
|
|
72
73
|
"nlist not consistent with quantizer size");
|
|
@@ -172,7 +173,7 @@ IndexIVF::IndexIVF(
|
|
|
172
173
|
}
|
|
173
174
|
}
|
|
174
175
|
|
|
175
|
-
IndexIVF::IndexIVF()
|
|
176
|
+
IndexIVF::IndexIVF() = default;
|
|
176
177
|
|
|
177
178
|
void IndexIVF::add(idx_t n, const float* x) {
|
|
178
179
|
add_with_ids(n, x, nullptr);
|
|
@@ -202,7 +203,8 @@ void IndexIVF::add_core(
|
|
|
202
203
|
idx_t n,
|
|
203
204
|
const float* x,
|
|
204
205
|
const idx_t* xids,
|
|
205
|
-
const idx_t* coarse_idx
|
|
206
|
+
const idx_t* coarse_idx,
|
|
207
|
+
void* inverted_list_context) {
|
|
206
208
|
// do some blocking to avoid excessive allocs
|
|
207
209
|
idx_t bs = 65536;
|
|
208
210
|
if (n > bs) {
|
|
@@ -217,7 +219,8 @@ void IndexIVF::add_core(
|
|
|
217
219
|
i1 - i0,
|
|
218
220
|
x + i0 * d,
|
|
219
221
|
xids ? xids + i0 : nullptr,
|
|
220
|
-
coarse_idx + i0
|
|
222
|
+
coarse_idx + i0,
|
|
223
|
+
inverted_list_context);
|
|
221
224
|
}
|
|
222
225
|
return;
|
|
223
226
|
}
|
|
@@ -248,7 +251,10 @@ void IndexIVF::add_core(
|
|
|
248
251
|
if (list_no >= 0 && list_no % nt == rank) {
|
|
249
252
|
idx_t id = xids ? xids[i] : ntotal + i;
|
|
250
253
|
size_t ofs = invlists->add_entry(
|
|
251
|
-
list_no,
|
|
254
|
+
list_no,
|
|
255
|
+
id,
|
|
256
|
+
flat_codes.get() + i * code_size,
|
|
257
|
+
inverted_list_context);
|
|
252
258
|
|
|
253
259
|
dm_adder.add(i, list_no, ofs);
|
|
254
260
|
|
|
@@ -375,7 +381,7 @@ void IndexIVF::search(
|
|
|
375
381
|
indexIVF_stats.add(stats[slice]);
|
|
376
382
|
}
|
|
377
383
|
} else {
|
|
378
|
-
// handle
|
|
384
|
+
// handle parallelization at level below (or don't run in parallel at
|
|
379
385
|
// all)
|
|
380
386
|
sub_search_func(n, x, distances, labels, &indexIVF_stats);
|
|
381
387
|
}
|
|
@@ -438,17 +444,19 @@ void IndexIVF::search_preassigned(
|
|
|
438
444
|
max_codes = unlimited_list_size;
|
|
439
445
|
}
|
|
440
446
|
|
|
441
|
-
bool do_parallel = omp_get_max_threads() >= 2 &&
|
|
447
|
+
[[maybe_unused]] bool do_parallel = omp_get_max_threads() >= 2 &&
|
|
442
448
|
(pmode == 0 ? false
|
|
443
449
|
: pmode == 3 ? n > 1
|
|
444
450
|
: pmode == 1 ? nprobe > 1
|
|
445
451
|
: nprobe * n > 1);
|
|
446
452
|
|
|
453
|
+
void* inverted_list_context =
|
|
454
|
+
params ? params->inverted_list_context : nullptr;
|
|
455
|
+
|
|
447
456
|
#pragma omp parallel if (do_parallel) reduction(+ : nlistv, ndis, nheap)
|
|
448
457
|
{
|
|
449
|
-
InvertedListScanner
|
|
450
|
-
get_InvertedListScanner(store_pairs, sel);
|
|
451
|
-
ScopeDeleter1<InvertedListScanner> del(scanner);
|
|
458
|
+
std::unique_ptr<InvertedListScanner> scanner(
|
|
459
|
+
get_InvertedListScanner(store_pairs, sel));
|
|
452
460
|
|
|
453
461
|
/*****************************************************
|
|
454
462
|
* Depending on parallel_mode, there are two possible ways
|
|
@@ -507,7 +515,7 @@ void IndexIVF::search_preassigned(
|
|
|
507
515
|
nlist);
|
|
508
516
|
|
|
509
517
|
// don't waste time on empty lists
|
|
510
|
-
if (invlists->is_empty(key)) {
|
|
518
|
+
if (invlists->is_empty(key, inverted_list_context)) {
|
|
511
519
|
return (size_t)0;
|
|
512
520
|
}
|
|
513
521
|
|
|
@@ -520,7 +528,7 @@ void IndexIVF::search_preassigned(
|
|
|
520
528
|
size_t list_size = 0;
|
|
521
529
|
|
|
522
530
|
std::unique_ptr<InvertedListsIterator> it(
|
|
523
|
-
invlists->get_iterator(key));
|
|
531
|
+
invlists->get_iterator(key, inverted_list_context));
|
|
524
532
|
|
|
525
533
|
nheap += scanner->iterate_codes(
|
|
526
534
|
it.get(), simi, idxi, k, list_size);
|
|
@@ -539,7 +547,8 @@ void IndexIVF::search_preassigned(
|
|
|
539
547
|
const idx_t* ids = nullptr;
|
|
540
548
|
|
|
541
549
|
if (!store_pairs) {
|
|
542
|
-
sids
|
|
550
|
+
sids = std::make_unique<InvertedLists::ScopedIds>(
|
|
551
|
+
invlists, key);
|
|
543
552
|
ids = sids->get();
|
|
544
553
|
}
|
|
545
554
|
|
|
@@ -659,7 +668,6 @@ void IndexIVF::search_preassigned(
|
|
|
659
668
|
#pragma omp for schedule(dynamic)
|
|
660
669
|
for (int64_t ij = 0; ij < n * nprobe; ij++) {
|
|
661
670
|
size_t i = ij / nprobe;
|
|
662
|
-
size_t j = ij % nprobe;
|
|
663
671
|
|
|
664
672
|
scanner->set_query(x + i * d);
|
|
665
673
|
init_result(local_dis.data(), local_idx.data());
|
|
@@ -696,12 +704,13 @@ void IndexIVF::search_preassigned(
|
|
|
696
704
|
}
|
|
697
705
|
}
|
|
698
706
|
|
|
699
|
-
if (ivf_stats) {
|
|
700
|
-
ivf_stats
|
|
701
|
-
ivf_stats->nlist += nlistv;
|
|
702
|
-
ivf_stats->ndis += ndis;
|
|
703
|
-
ivf_stats->nheap_updates += nheap;
|
|
707
|
+
if (ivf_stats == nullptr) {
|
|
708
|
+
ivf_stats = &indexIVF_stats;
|
|
704
709
|
}
|
|
710
|
+
ivf_stats->nq += n;
|
|
711
|
+
ivf_stats->nlist += nlistv;
|
|
712
|
+
ivf_stats->ndis += ndis;
|
|
713
|
+
ivf_stats->nheap_updates += nheap;
|
|
705
714
|
}
|
|
706
715
|
|
|
707
716
|
void IndexIVF::range_search(
|
|
@@ -775,12 +784,15 @@ void IndexIVF::range_search_preassigned(
|
|
|
775
784
|
|
|
776
785
|
int pmode = this->parallel_mode & ~PARALLEL_MODE_NO_HEAP_INIT;
|
|
777
786
|
// don't start parallel section if single query
|
|
778
|
-
bool do_parallel = omp_get_max_threads() >= 2 &&
|
|
787
|
+
[[maybe_unused]] bool do_parallel = omp_get_max_threads() >= 2 &&
|
|
779
788
|
(pmode == 3 ? false
|
|
780
789
|
: pmode == 0 ? nx > 1
|
|
781
790
|
: pmode == 1 ? nprobe > 1
|
|
782
791
|
: nprobe * nx > 1);
|
|
783
792
|
|
|
793
|
+
void* inverted_list_context =
|
|
794
|
+
params ? params->inverted_list_context : nullptr;
|
|
795
|
+
|
|
784
796
|
#pragma omp parallel if (do_parallel) reduction(+ : nlistv, ndis)
|
|
785
797
|
{
|
|
786
798
|
RangeSearchPartialResult pres(result);
|
|
@@ -802,7 +814,7 @@ void IndexIVF::range_search_preassigned(
|
|
|
802
814
|
ik,
|
|
803
815
|
nlist);
|
|
804
816
|
|
|
805
|
-
if (invlists->is_empty(key)) {
|
|
817
|
+
if (invlists->is_empty(key, inverted_list_context)) {
|
|
806
818
|
return;
|
|
807
819
|
}
|
|
808
820
|
|
|
@@ -811,7 +823,7 @@ void IndexIVF::range_search_preassigned(
|
|
|
811
823
|
scanner->set_list(key, coarse_dis[i * nprobe + ik]);
|
|
812
824
|
if (invlists->use_iterator) {
|
|
813
825
|
std::unique_ptr<InvertedListsIterator> it(
|
|
814
|
-
invlists->get_iterator(key));
|
|
826
|
+
invlists->get_iterator(key, inverted_list_context));
|
|
815
827
|
|
|
816
828
|
scanner->iterate_codes_range(
|
|
817
829
|
it.get(), radius, qres, list_size);
|
|
@@ -891,17 +903,18 @@ void IndexIVF::range_search_preassigned(
|
|
|
891
903
|
}
|
|
892
904
|
}
|
|
893
905
|
|
|
894
|
-
if (stats) {
|
|
895
|
-
stats
|
|
896
|
-
stats->nlist += nlistv;
|
|
897
|
-
stats->ndis += ndis;
|
|
906
|
+
if (stats == nullptr) {
|
|
907
|
+
stats = &indexIVF_stats;
|
|
898
908
|
}
|
|
909
|
+
stats->nq += nx;
|
|
910
|
+
stats->nlist += nlistv;
|
|
911
|
+
stats->ndis += ndis;
|
|
899
912
|
}
|
|
900
913
|
|
|
901
914
|
InvertedListScanner* IndexIVF::get_InvertedListScanner(
|
|
902
915
|
bool /*store_pairs*/,
|
|
903
916
|
const IDSelector* /* sel */) const {
|
|
904
|
-
|
|
917
|
+
FAISS_THROW_MSG("get_InvertedListScanner not implemented");
|
|
905
918
|
}
|
|
906
919
|
|
|
907
920
|
void IndexIVF::reconstruct(idx_t key, float* recons) const {
|
|
@@ -973,14 +986,12 @@ void IndexIVF::search_and_reconstruct(
|
|
|
973
986
|
std::min(nlist, params ? params->nprobe : this->nprobe);
|
|
974
987
|
FAISS_THROW_IF_NOT(nprobe > 0);
|
|
975
988
|
|
|
976
|
-
idx_t
|
|
977
|
-
|
|
978
|
-
float* coarse_dis = new float[n * nprobe];
|
|
979
|
-
ScopeDeleter<float> del2(coarse_dis);
|
|
989
|
+
std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
|
|
990
|
+
std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
|
|
980
991
|
|
|
981
|
-
quantizer->search(n, x, nprobe, coarse_dis, idx);
|
|
992
|
+
quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
|
|
982
993
|
|
|
983
|
-
invlists->prefetch_lists(idx, n * nprobe);
|
|
994
|
+
invlists->prefetch_lists(idx.get(), n * nprobe);
|
|
984
995
|
|
|
985
996
|
// search_preassigned() with `store_pairs` enabled to obtain the list_no
|
|
986
997
|
// and offset into `codes` for reconstruction
|
|
@@ -988,29 +999,94 @@ void IndexIVF::search_and_reconstruct(
|
|
|
988
999
|
n,
|
|
989
1000
|
x,
|
|
990
1001
|
k,
|
|
991
|
-
idx,
|
|
992
|
-
coarse_dis,
|
|
1002
|
+
idx.get(),
|
|
1003
|
+
coarse_dis.get(),
|
|
993
1004
|
distances,
|
|
994
1005
|
labels,
|
|
995
1006
|
true /* store_pairs */,
|
|
996
1007
|
params);
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
+
#pragma omp parallel for if (n * k > 1000)
|
|
1009
|
+
for (idx_t ij = 0; ij < n * k; ij++) {
|
|
1010
|
+
idx_t key = labels[ij];
|
|
1011
|
+
float* reconstructed = recons + ij * d;
|
|
1012
|
+
if (key < 0) {
|
|
1013
|
+
// Fill with NaNs
|
|
1014
|
+
memset(reconstructed, -1, sizeof(*reconstructed) * d);
|
|
1015
|
+
} else {
|
|
1016
|
+
int list_no = lo_listno(key);
|
|
1017
|
+
int offset = lo_offset(key);
|
|
1018
|
+
|
|
1019
|
+
// Update label to the actual id
|
|
1020
|
+
labels[ij] = invlists->get_single_id(list_no, offset);
|
|
1021
|
+
|
|
1022
|
+
reconstruct_from_offset(list_no, offset, reconstructed);
|
|
1023
|
+
}
|
|
1024
|
+
}
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
void IndexIVF::search_and_return_codes(
|
|
1028
|
+
idx_t n,
|
|
1029
|
+
const float* x,
|
|
1030
|
+
idx_t k,
|
|
1031
|
+
float* distances,
|
|
1032
|
+
idx_t* labels,
|
|
1033
|
+
uint8_t* codes,
|
|
1034
|
+
bool include_listno,
|
|
1035
|
+
const SearchParameters* params_in) const {
|
|
1036
|
+
const IVFSearchParameters* params = nullptr;
|
|
1037
|
+
if (params_in) {
|
|
1038
|
+
params = dynamic_cast<const IVFSearchParameters*>(params_in);
|
|
1039
|
+
FAISS_THROW_IF_NOT_MSG(params, "IndexIVF params have incorrect type");
|
|
1040
|
+
}
|
|
1041
|
+
const size_t nprobe =
|
|
1042
|
+
std::min(nlist, params ? params->nprobe : this->nprobe);
|
|
1043
|
+
FAISS_THROW_IF_NOT(nprobe > 0);
|
|
1044
|
+
|
|
1045
|
+
std::unique_ptr<idx_t[]> idx(new idx_t[n * nprobe]);
|
|
1046
|
+
std::unique_ptr<float[]> coarse_dis(new float[n * nprobe]);
|
|
1047
|
+
|
|
1048
|
+
quantizer->search(n, x, nprobe, coarse_dis.get(), idx.get());
|
|
1049
|
+
|
|
1050
|
+
invlists->prefetch_lists(idx.get(), n * nprobe);
|
|
1008
1051
|
|
|
1009
|
-
|
|
1010
|
-
|
|
1052
|
+
// search_preassigned() with `store_pairs` enabled to obtain the list_no
|
|
1053
|
+
// and offset into `codes` for reconstruction
|
|
1054
|
+
search_preassigned(
|
|
1055
|
+
n,
|
|
1056
|
+
x,
|
|
1057
|
+
k,
|
|
1058
|
+
idx.get(),
|
|
1059
|
+
coarse_dis.get(),
|
|
1060
|
+
distances,
|
|
1061
|
+
labels,
|
|
1062
|
+
true /* store_pairs */,
|
|
1063
|
+
params);
|
|
1064
|
+
|
|
1065
|
+
size_t code_size_1 = code_size;
|
|
1066
|
+
if (include_listno) {
|
|
1067
|
+
code_size_1 += coarse_code_size();
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
#pragma omp parallel for if (n * k > 1000)
|
|
1071
|
+
for (idx_t ij = 0; ij < n * k; ij++) {
|
|
1072
|
+
idx_t key = labels[ij];
|
|
1073
|
+
uint8_t* code1 = codes + ij * code_size_1;
|
|
1011
1074
|
|
|
1012
|
-
|
|
1075
|
+
if (key < 0) {
|
|
1076
|
+
// Fill with 0xff
|
|
1077
|
+
memset(code1, -1, code_size_1);
|
|
1078
|
+
} else {
|
|
1079
|
+
int list_no = lo_listno(key);
|
|
1080
|
+
int offset = lo_offset(key);
|
|
1081
|
+
const uint8_t* cc = invlists->get_single_code(list_no, offset);
|
|
1082
|
+
|
|
1083
|
+
labels[ij] = invlists->get_single_id(list_no, offset);
|
|
1084
|
+
|
|
1085
|
+
if (include_listno) {
|
|
1086
|
+
encode_listno(list_no, code1);
|
|
1087
|
+
code1 += code_size_1 - code_size;
|
|
1013
1088
|
}
|
|
1089
|
+
memcpy(code1, cc, code_size);
|
|
1014
1090
|
}
|
|
1015
1091
|
}
|
|
1016
1092
|
}
|
|
@@ -1061,22 +1137,52 @@ void IndexIVF::update_vectors(int n, const idx_t* new_ids, const float* x) {
|
|
|
1061
1137
|
}
|
|
1062
1138
|
|
|
1063
1139
|
void IndexIVF::train(idx_t n, const float* x) {
|
|
1064
|
-
if (verbose)
|
|
1140
|
+
if (verbose) {
|
|
1065
1141
|
printf("Training level-1 quantizer\n");
|
|
1142
|
+
}
|
|
1066
1143
|
|
|
1067
1144
|
train_q1(n, x, verbose, metric_type);
|
|
1068
1145
|
|
|
1069
|
-
if (verbose)
|
|
1146
|
+
if (verbose) {
|
|
1070
1147
|
printf("Training IVF residual\n");
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
// optional subsampling
|
|
1151
|
+
idx_t max_nt = train_encoder_num_vectors();
|
|
1152
|
+
if (max_nt <= 0) {
|
|
1153
|
+
max_nt = (size_t)1 << 35;
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
TransformedVectors tv(
|
|
1157
|
+
x, fvecs_maybe_subsample(d, (size_t*)&n, max_nt, x, verbose));
|
|
1158
|
+
|
|
1159
|
+
if (by_residual) {
|
|
1160
|
+
std::vector<idx_t> assign(n);
|
|
1161
|
+
quantizer->assign(n, tv.x, assign.data());
|
|
1162
|
+
|
|
1163
|
+
std::vector<float> residuals(n * d);
|
|
1164
|
+
quantizer->compute_residual_n(n, tv.x, residuals.data(), assign.data());
|
|
1165
|
+
|
|
1166
|
+
train_encoder(n, residuals.data(), assign.data());
|
|
1167
|
+
} else {
|
|
1168
|
+
train_encoder(n, tv.x, nullptr);
|
|
1169
|
+
}
|
|
1071
1170
|
|
|
1072
|
-
train_residual(n, x);
|
|
1073
1171
|
is_trained = true;
|
|
1074
1172
|
}
|
|
1075
1173
|
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1174
|
+
idx_t IndexIVF::train_encoder_num_vectors() const {
|
|
1175
|
+
return 0;
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
void IndexIVF::train_encoder(
|
|
1179
|
+
idx_t /*n*/,
|
|
1180
|
+
const float* /*x*/,
|
|
1181
|
+
const idx_t* assign) {
|
|
1079
1182
|
// does nothing by default
|
|
1183
|
+
if (verbose) {
|
|
1184
|
+
printf("IndexIVF: no residual training\n");
|
|
1185
|
+
}
|
|
1080
1186
|
}
|
|
1081
1187
|
|
|
1082
1188
|
bool check_compatible_for_merge_expensive_check = true;
|
|
@@ -72,6 +72,8 @@ struct SearchParametersIVF : SearchParameters {
|
|
|
72
72
|
size_t nprobe = 1; ///< number of probes at query time
|
|
73
73
|
size_t max_codes = 0; ///< max nb of codes to visit to do a query
|
|
74
74
|
SearchParameters* quantizer_params = nullptr;
|
|
75
|
+
/// context object to pass to InvertedLists
|
|
76
|
+
void* inverted_list_context = nullptr;
|
|
75
77
|
|
|
76
78
|
virtual ~SearchParametersIVF() {}
|
|
77
79
|
};
|
|
@@ -177,6 +179,7 @@ struct IndexIVF : Index, IndexIVFInterface {
|
|
|
177
179
|
bool own_invlists = false;
|
|
178
180
|
|
|
179
181
|
size_t code_size = 0; ///< code size per vector in bytes
|
|
182
|
+
|
|
180
183
|
/** Parallel mode determines how queries are parallelized with OpenMP
|
|
181
184
|
*
|
|
182
185
|
* 0 (default): split over queries
|
|
@@ -194,6 +197,10 @@ struct IndexIVF : Index, IndexIVFInterface {
|
|
|
194
197
|
* enables reconstruct() */
|
|
195
198
|
DirectMap direct_map;
|
|
196
199
|
|
|
200
|
+
/// do the codes in the invlists encode the vectors relative to the
|
|
201
|
+
/// centroids?
|
|
202
|
+
bool by_residual = true;
|
|
203
|
+
|
|
197
204
|
/** The Inverted file takes a quantizer (an Index) on input,
|
|
198
205
|
* which implements the function mapping a vector to a list
|
|
199
206
|
* identifier.
|
|
@@ -207,7 +214,7 @@ struct IndexIVF : Index, IndexIVFInterface {
|
|
|
207
214
|
|
|
208
215
|
void reset() override;
|
|
209
216
|
|
|
210
|
-
/// Trains the quantizer and calls
|
|
217
|
+
/// Trains the quantizer and calls train_encoder to train sub-quantizers
|
|
211
218
|
void train(idx_t n, const float* x) override;
|
|
212
219
|
|
|
213
220
|
/// Calls add_with_ids with NULL ids
|
|
@@ -227,7 +234,8 @@ struct IndexIVF : Index, IndexIVFInterface {
|
|
|
227
234
|
idx_t n,
|
|
228
235
|
const float* x,
|
|
229
236
|
const idx_t* xids,
|
|
230
|
-
const idx_t* precomputed_idx
|
|
237
|
+
const idx_t* precomputed_idx,
|
|
238
|
+
void* inverted_list_context = nullptr);
|
|
231
239
|
|
|
232
240
|
/** Encodes a set of vectors as they would appear in the inverted lists
|
|
233
241
|
*
|
|
@@ -252,9 +260,15 @@ struct IndexIVF : Index, IndexIVFInterface {
|
|
|
252
260
|
*/
|
|
253
261
|
void add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids);
|
|
254
262
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
263
|
+
/** Train the encoder for the vectors.
|
|
264
|
+
*
|
|
265
|
+
* If by_residual then it is called with residuals and corresponding assign
|
|
266
|
+
* array, otherwise x is the raw training vectors and assign=nullptr */
|
|
267
|
+
virtual void train_encoder(idx_t n, const float* x, const idx_t* assign);
|
|
268
|
+
|
|
269
|
+
/// can be redefined by subclasses to indicate how many training vectors
|
|
270
|
+
/// they need
|
|
271
|
+
virtual idx_t train_encoder_num_vectors() const;
|
|
258
272
|
|
|
259
273
|
void search_preassigned(
|
|
260
274
|
idx_t n,
|
|
@@ -346,6 +360,24 @@ struct IndexIVF : Index, IndexIVFInterface {
|
|
|
346
360
|
float* recons,
|
|
347
361
|
const SearchParameters* params = nullptr) const override;
|
|
348
362
|
|
|
363
|
+
/** Similar to search, but also returns the codes corresponding to the
|
|
364
|
+
* stored vectors for the search results.
|
|
365
|
+
*
|
|
366
|
+
* @param codes codes (n, k, code_size)
|
|
367
|
+
* @param include_listno
|
|
368
|
+
* include the list ids in the code (in this case add
|
|
369
|
+
* ceil(log8(nlist)) to the code size)
|
|
370
|
+
*/
|
|
371
|
+
void search_and_return_codes(
|
|
372
|
+
idx_t n,
|
|
373
|
+
const float* x,
|
|
374
|
+
idx_t k,
|
|
375
|
+
float* distances,
|
|
376
|
+
idx_t* labels,
|
|
377
|
+
uint8_t* recons,
|
|
378
|
+
bool include_listno = false,
|
|
379
|
+
const SearchParameters* params = nullptr) const;
|
|
380
|
+
|
|
349
381
|
/** Reconstruct a vector given the location in terms of (inv list index +
|
|
350
382
|
* inv list offset) instead of the id.
|
|
351
383
|
*
|
|
@@ -401,6 +433,14 @@ struct IndexIVF : Index, IndexIVFInterface {
|
|
|
401
433
|
|
|
402
434
|
/* The standalone codec interface (except sa_decode that is specific) */
|
|
403
435
|
size_t sa_code_size() const override;
|
|
436
|
+
|
|
437
|
+
/** encode a set of vectors
|
|
438
|
+
* sa_encode will call encode_vector with include_listno=true
|
|
439
|
+
* @param n nb of vectors to encode
|
|
440
|
+
* @param x the vectors to encode
|
|
441
|
+
* @param bytes output array for the codes
|
|
442
|
+
* @return nb of bytes written to codes
|
|
443
|
+
*/
|
|
404
444
|
void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
|
|
405
445
|
|
|
406
446
|
IndexIVF();
|
|
@@ -439,7 +479,7 @@ struct InvertedListScanner {
|
|
|
439
479
|
virtual float distance_to_code(const uint8_t* code) const = 0;
|
|
440
480
|
|
|
441
481
|
/** scan a set of codes, compute distances to current query and
|
|
442
|
-
* update heap of results if necessary. Default
|
|
482
|
+
* update heap of results if necessary. Default implementation
|
|
443
483
|
* calls distance_to_code.
|
|
444
484
|
*
|
|
445
485
|
* @param n number of codes to scan
|
|
@@ -37,30 +37,20 @@ IndexIVFAdditiveQuantizer::IndexIVFAdditiveQuantizer(
|
|
|
37
37
|
IndexIVFAdditiveQuantizer::IndexIVFAdditiveQuantizer(AdditiveQuantizer* aq)
|
|
38
38
|
: IndexIVF(), aq(aq) {}
|
|
39
39
|
|
|
40
|
-
void IndexIVFAdditiveQuantizer::
|
|
41
|
-
|
|
40
|
+
void IndexIVFAdditiveQuantizer::train_encoder(
|
|
41
|
+
idx_t n,
|
|
42
|
+
const float* x,
|
|
43
|
+
const idx_t* assign) {
|
|
44
|
+
aq->train(n, x);
|
|
45
|
+
}
|
|
42
46
|
|
|
47
|
+
idx_t IndexIVFAdditiveQuantizer::train_encoder_num_vectors() const {
|
|
43
48
|
size_t max_train_points = 1024 * ((size_t)1 << aq->nbits[0]);
|
|
44
49
|
// we need more data to train LSQ
|
|
45
50
|
if (dynamic_cast<LocalSearchQuantizer*>(aq)) {
|
|
46
51
|
max_train_points = 1024 * aq->M * ((size_t)1 << aq->nbits[0]);
|
|
47
52
|
}
|
|
48
|
-
|
|
49
|
-
x = fvecs_maybe_subsample(
|
|
50
|
-
d, (size_t*)&n, max_train_points, x, verbose, 1234);
|
|
51
|
-
ScopeDeleter<float> del_x(x_in == x ? nullptr : x);
|
|
52
|
-
|
|
53
|
-
if (by_residual) {
|
|
54
|
-
std::vector<idx_t> idx(n);
|
|
55
|
-
quantizer->assign(n, x, idx.data());
|
|
56
|
-
|
|
57
|
-
std::vector<float> residuals(n * d);
|
|
58
|
-
quantizer->compute_residual_n(n, x, residuals.data(), idx.data());
|
|
59
|
-
|
|
60
|
-
aq->train(n, residuals.data());
|
|
61
|
-
} else {
|
|
62
|
-
aq->train(n, x);
|
|
63
|
-
}
|
|
53
|
+
return max_train_points;
|
|
64
54
|
}
|
|
65
55
|
|
|
66
56
|
void IndexIVFAdditiveQuantizer::encode_vectors(
|
|
@@ -126,7 +116,22 @@ void IndexIVFAdditiveQuantizer::sa_decode(
|
|
|
126
116
|
}
|
|
127
117
|
}
|
|
128
118
|
|
|
129
|
-
IndexIVFAdditiveQuantizer
|
|
119
|
+
void IndexIVFAdditiveQuantizer::reconstruct_from_offset(
|
|
120
|
+
int64_t list_no,
|
|
121
|
+
int64_t offset,
|
|
122
|
+
float* recons) const {
|
|
123
|
+
const uint8_t* code = invlists->get_single_code(list_no, offset);
|
|
124
|
+
aq->decode(code, recons, 1);
|
|
125
|
+
if (by_residual) {
|
|
126
|
+
std::vector<float> centroid(d);
|
|
127
|
+
quantizer->reconstruct(list_no, centroid.data());
|
|
128
|
+
for (int i = 0; i < d; ++i) {
|
|
129
|
+
recons[i] += centroid[i];
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
IndexIVFAdditiveQuantizer::~IndexIVFAdditiveQuantizer() = default;
|
|
130
135
|
|
|
131
136
|
/*********************************************
|
|
132
137
|
* AQInvertedListScanner
|
|
@@ -159,6 +164,7 @@ struct AQInvertedListScanner : InvertedListScanner {
|
|
|
159
164
|
const float* q;
|
|
160
165
|
/// following codes come from this inverted list
|
|
161
166
|
void set_list(idx_t list_no, float coarse_dis) override {
|
|
167
|
+
this->list_no = list_no;
|
|
162
168
|
if (ia.metric_type == METRIC_L2 && ia.by_residual) {
|
|
163
169
|
ia.quantizer->compute_residual(q0, tmp.data(), list_no);
|
|
164
170
|
q = tmp.data();
|
|
@@ -167,7 +173,7 @@ struct AQInvertedListScanner : InvertedListScanner {
|
|
|
167
173
|
}
|
|
168
174
|
}
|
|
169
175
|
|
|
170
|
-
~AQInvertedListScanner()
|
|
176
|
+
~AQInvertedListScanner() = default;
|
|
171
177
|
};
|
|
172
178
|
|
|
173
179
|
template <bool is_IP>
|
|
@@ -198,7 +204,7 @@ struct AQInvertedListScannerDecompress : AQInvertedListScanner {
|
|
|
198
204
|
: fvec_L2sqr(q, b.data(), aq.d);
|
|
199
205
|
}
|
|
200
206
|
|
|
201
|
-
~AQInvertedListScannerDecompress() override
|
|
207
|
+
~AQInvertedListScannerDecompress() override = default;
|
|
202
208
|
};
|
|
203
209
|
|
|
204
210
|
template <bool is_IP, Search_type_t search_type>
|
|
@@ -241,7 +247,7 @@ struct AQInvertedListScannerLUT : AQInvertedListScanner {
|
|
|
241
247
|
aq.compute_1_distance_LUT<is_IP, search_type>(code, LUT.data());
|
|
242
248
|
}
|
|
243
249
|
|
|
244
|
-
~AQInvertedListScannerLUT() override
|
|
250
|
+
~AQInvertedListScannerLUT() override = default;
|
|
245
251
|
};
|
|
246
252
|
|
|
247
253
|
} // anonymous namespace
|
|
@@ -320,7 +326,7 @@ IndexIVFResidualQuantizer::IndexIVFResidualQuantizer(
|
|
|
320
326
|
metric,
|
|
321
327
|
search_type) {}
|
|
322
328
|
|
|
323
|
-
IndexIVFResidualQuantizer::~IndexIVFResidualQuantizer()
|
|
329
|
+
IndexIVFResidualQuantizer::~IndexIVFResidualQuantizer() = default;
|
|
324
330
|
|
|
325
331
|
/**************************************************************************************
|
|
326
332
|
* IndexIVFLocalSearchQuantizer
|
|
@@ -342,7 +348,7 @@ IndexIVFLocalSearchQuantizer::IndexIVFLocalSearchQuantizer(
|
|
|
342
348
|
IndexIVFLocalSearchQuantizer::IndexIVFLocalSearchQuantizer()
|
|
343
349
|
: IndexIVFAdditiveQuantizer(&lsq) {}
|
|
344
350
|
|
|
345
|
-
IndexIVFLocalSearchQuantizer::~IndexIVFLocalSearchQuantizer()
|
|
351
|
+
IndexIVFLocalSearchQuantizer::~IndexIVFLocalSearchQuantizer() = default;
|
|
346
352
|
|
|
347
353
|
/**************************************************************************************
|
|
348
354
|
* IndexIVFProductResidualQuantizer
|
|
@@ -365,7 +371,7 @@ IndexIVFProductResidualQuantizer::IndexIVFProductResidualQuantizer(
|
|
|
365
371
|
IndexIVFProductResidualQuantizer::IndexIVFProductResidualQuantizer()
|
|
366
372
|
: IndexIVFAdditiveQuantizer(&prq) {}
|
|
367
373
|
|
|
368
|
-
IndexIVFProductResidualQuantizer::~IndexIVFProductResidualQuantizer()
|
|
374
|
+
IndexIVFProductResidualQuantizer::~IndexIVFProductResidualQuantizer() = default;
|
|
369
375
|
|
|
370
376
|
/**************************************************************************************
|
|
371
377
|
* IndexIVFProductLocalSearchQuantizer
|
|
@@ -388,6 +394,7 @@ IndexIVFProductLocalSearchQuantizer::IndexIVFProductLocalSearchQuantizer(
|
|
|
388
394
|
IndexIVFProductLocalSearchQuantizer::IndexIVFProductLocalSearchQuantizer()
|
|
389
395
|
: IndexIVFAdditiveQuantizer(&plsq) {}
|
|
390
396
|
|
|
391
|
-
IndexIVFProductLocalSearchQuantizer::~IndexIVFProductLocalSearchQuantizer()
|
|
397
|
+
IndexIVFProductLocalSearchQuantizer::~IndexIVFProductLocalSearchQuantizer() =
|
|
398
|
+
default;
|
|
392
399
|
|
|
393
400
|
} // namespace faiss
|
|
@@ -26,7 +26,6 @@ namespace faiss {
|
|
|
26
26
|
struct IndexIVFAdditiveQuantizer : IndexIVF {
|
|
27
27
|
// the quantizer
|
|
28
28
|
AdditiveQuantizer* aq;
|
|
29
|
-
bool by_residual = true;
|
|
30
29
|
int use_precomputed_table = 0; // for future use
|
|
31
30
|
|
|
32
31
|
using Search_type_t = AdditiveQuantizer::Search_type_t;
|
|
@@ -40,7 +39,9 @@ struct IndexIVFAdditiveQuantizer : IndexIVF {
|
|
|
40
39
|
|
|
41
40
|
explicit IndexIVFAdditiveQuantizer(AdditiveQuantizer* aq);
|
|
42
41
|
|
|
43
|
-
void
|
|
42
|
+
void train_encoder(idx_t n, const float* x, const idx_t* assign) override;
|
|
43
|
+
|
|
44
|
+
idx_t train_encoder_num_vectors() const override;
|
|
44
45
|
|
|
45
46
|
void encode_vectors(
|
|
46
47
|
idx_t n,
|
|
@@ -55,6 +56,9 @@ struct IndexIVFAdditiveQuantizer : IndexIVF {
|
|
|
55
56
|
|
|
56
57
|
void sa_decode(idx_t n, const uint8_t* codes, float* x) const override;
|
|
57
58
|
|
|
59
|
+
void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
|
|
60
|
+
const override;
|
|
61
|
+
|
|
58
62
|
~IndexIVFAdditiveQuantizer() override;
|
|
59
63
|
};
|
|
60
64
|
|