faiss 0.2.7 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/ext/faiss/extconf.rb +9 -2
- data/ext/faiss/index.cpp +1 -1
- data/ext/faiss/index_binary.cpp +2 -2
- data/ext/faiss/product_quantizer.cpp +1 -1
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +7 -7
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +4 -18
- data/vendor/faiss/faiss/Clustering.h +31 -21
- data/vendor/faiss/faiss/IVFlib.cpp +22 -11
- data/vendor/faiss/faiss/Index.cpp +1 -1
- data/vendor/faiss/faiss/Index.h +20 -5
- data/vendor/faiss/faiss/Index2Layer.cpp +7 -7
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +176 -166
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +15 -15
- data/vendor/faiss/faiss/IndexBinary.cpp +9 -4
- data/vendor/faiss/faiss/IndexBinary.h +8 -19
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +24 -31
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +25 -50
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +106 -187
- data/vendor/faiss/faiss/IndexFastScan.cpp +90 -159
- data/vendor/faiss/faiss/IndexFastScan.h +9 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +195 -3
- data/vendor/faiss/faiss/IndexFlat.h +20 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +11 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +3 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +112 -316
- data/vendor/faiss/faiss/IndexHNSW.h +12 -48
- data/vendor/faiss/faiss/IndexIDMap.cpp +69 -28
- data/vendor/faiss/faiss/IndexIDMap.h +24 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +159 -53
- data/vendor/faiss/faiss/IndexIVF.h +37 -5
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +18 -26
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -2
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +19 -46
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +433 -405
- data/vendor/faiss/faiss/IndexIVFFastScan.h +56 -26
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +15 -5
- data/vendor/faiss/faiss/IndexIVFFlat.h +3 -2
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.cpp +172 -0
- data/vendor/faiss/faiss/IndexIVFIndependentQuantizer.h +56 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +78 -122
- data/vendor/faiss/faiss/IndexIVFPQ.h +6 -7
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +18 -50
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +4 -3
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +45 -29
- data/vendor/faiss/faiss/IndexIVFPQR.h +5 -2
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +25 -27
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +6 -6
- data/vendor/faiss/faiss/IndexLSH.cpp +14 -16
- data/vendor/faiss/faiss/IndexNNDescent.cpp +3 -4
- data/vendor/faiss/faiss/IndexNSG.cpp +11 -27
- data/vendor/faiss/faiss/IndexNSG.h +10 -10
- data/vendor/faiss/faiss/IndexPQ.cpp +72 -88
- data/vendor/faiss/faiss/IndexPQ.h +1 -4
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +25 -31
- data/vendor/faiss/faiss/IndexRefine.cpp +49 -19
- data/vendor/faiss/faiss/IndexRefine.h +7 -0
- data/vendor/faiss/faiss/IndexReplicas.cpp +23 -26
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +22 -16
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -4
- data/vendor/faiss/faiss/IndexShards.cpp +21 -29
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +1 -2
- data/vendor/faiss/faiss/MatrixStats.cpp +17 -32
- data/vendor/faiss/faiss/MatrixStats.h +21 -9
- data/vendor/faiss/faiss/MetaIndexes.cpp +35 -35
- data/vendor/faiss/faiss/VectorTransform.cpp +13 -26
- data/vendor/faiss/faiss/VectorTransform.h +7 -7
- data/vendor/faiss/faiss/clone_index.cpp +15 -10
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +87 -4
- data/vendor/faiss/faiss/gpu/GpuCloner.h +22 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +7 -0
- data/vendor/faiss/faiss/gpu/GpuDistance.h +46 -38
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +4 -4
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +8 -9
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +18 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -11
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +24 -3
- data/vendor/faiss/faiss/gpu/GpuResources.h +39 -11
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +117 -17
- data/vendor/faiss/faiss/gpu/StandardGpuResources.h +57 -3
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +25 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +129 -9
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +267 -40
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +299 -208
- data/vendor/faiss/faiss/gpu/test/TestGpuMemoryException.cpp +1 -0
- data/vendor/faiss/faiss/gpu/utils/RaftUtils.h +75 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +5 -5
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -2
- data/vendor/faiss/faiss/impl/DistanceComputer.h +24 -1
- data/vendor/faiss/faiss/impl/FaissException.h +13 -34
- data/vendor/faiss/faiss/impl/HNSW.cpp +321 -70
- data/vendor/faiss/faiss/impl/HNSW.h +9 -8
- data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/impl/NNDescent.cpp +29 -19
- data/vendor/faiss/faiss/impl/NSG.h +1 -1
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +14 -12
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +24 -22
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/Quantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +27 -1015
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +5 -63
- data/vendor/faiss/faiss/impl/ResultHandler.h +232 -176
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +444 -104
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -8
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +280 -42
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +21 -14
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +22 -12
- data/vendor/faiss/faiss/impl/index_read.cpp +45 -19
- data/vendor/faiss/faiss/impl/index_write.cpp +60 -41
- data/vendor/faiss/faiss/impl/io.cpp +10 -10
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +18 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +3 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +7 -6
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +52 -38
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +40 -49
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +960 -0
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +176 -0
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +374 -202
- data/vendor/faiss/faiss/index_factory.cpp +10 -7
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +27 -9
- data/vendor/faiss/faiss/invlists/InvertedLists.h +12 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +3 -1
- data/vendor/faiss/faiss/utils/WorkerThread.h +1 -0
- data/vendor/faiss/faiss/utils/distances.cpp +128 -74
- data/vendor/faiss/faiss/utils/distances.h +81 -4
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +1 -1
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +5 -5
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +1 -1
- data/vendor/faiss/faiss/utils/distances_simd.cpp +428 -70
- data/vendor/faiss/faiss/utils/fp16-arm.h +29 -0
- data/vendor/faiss/faiss/utils/fp16.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +162 -110
- data/vendor/faiss/faiss/utils/hamming.h +58 -0
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +16 -89
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +15 -87
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +57 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +14 -104
- data/vendor/faiss/faiss/utils/partitioning.cpp +3 -4
- data/vendor/faiss/faiss/utils/prefetch.h +77 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +0 -14
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +0 -6
- data/vendor/faiss/faiss/utils/simdlib_neon.h +72 -77
- data/vendor/faiss/faiss/utils/sorting.cpp +140 -5
- data/vendor/faiss/faiss/utils/sorting.h +27 -0
- data/vendor/faiss/faiss/utils/utils.cpp +112 -6
- data/vendor/faiss/faiss/utils/utils.h +57 -20
- metadata +11 -4
@@ -33,6 +33,7 @@
|
|
33
33
|
#include <faiss/IndexIVFAdditiveQuantizer.h>
|
34
34
|
#include <faiss/IndexIVFAdditiveQuantizerFastScan.h>
|
35
35
|
#include <faiss/IndexIVFFlat.h>
|
36
|
+
#include <faiss/IndexIVFIndependentQuantizer.h>
|
36
37
|
#include <faiss/IndexIVFPQ.h>
|
37
38
|
#include <faiss/IndexIVFPQFastScan.h>
|
38
39
|
#include <faiss/IndexIVFPQR.h>
|
@@ -291,11 +292,17 @@ static void read_AdditiveQuantizer(AdditiveQuantizer* aq, IOReader* f) {
|
|
291
292
|
aq->set_derived_values();
|
292
293
|
}
|
293
294
|
|
294
|
-
static void read_ResidualQuantizer(
|
295
|
+
static void read_ResidualQuantizer(
|
296
|
+
ResidualQuantizer* rq,
|
297
|
+
IOReader* f,
|
298
|
+
int io_flags) {
|
295
299
|
read_AdditiveQuantizer(rq, f);
|
296
300
|
READ1(rq->train_type);
|
297
301
|
READ1(rq->max_beam_size);
|
298
|
-
if (
|
302
|
+
if ((rq->train_type & ResidualQuantizer::Skip_codebook_tables) ||
|
303
|
+
(io_flags & IO_FLAG_SKIP_PRECOMPUTE_TABLE)) {
|
304
|
+
// don't precompute the tables
|
305
|
+
} else {
|
299
306
|
rq->compute_codebook_tables();
|
300
307
|
}
|
301
308
|
}
|
@@ -324,12 +331,13 @@ static void read_ProductAdditiveQuantizer(
|
|
324
331
|
|
325
332
|
static void read_ProductResidualQuantizer(
|
326
333
|
ProductResidualQuantizer* prq,
|
327
|
-
IOReader* f
|
334
|
+
IOReader* f,
|
335
|
+
int io_flags) {
|
328
336
|
read_ProductAdditiveQuantizer(prq, f);
|
329
337
|
|
330
338
|
for (size_t i = 0; i < prq->nsplits; i++) {
|
331
339
|
auto rq = new ResidualQuantizer();
|
332
|
-
read_ResidualQuantizer(rq, f);
|
340
|
+
read_ResidualQuantizer(rq, f, io_flags);
|
333
341
|
prq->quantizers.push_back(rq);
|
334
342
|
}
|
335
343
|
}
|
@@ -390,15 +398,12 @@ static void read_NSG(NSG* nsg, IOReader* f) {
|
|
390
398
|
graph = std::make_shared<nsg::Graph<int>>(N, R);
|
391
399
|
std::fill_n(graph->data, N * R, EMPTY_ID);
|
392
400
|
|
393
|
-
int size = 0;
|
394
|
-
|
395
401
|
for (int i = 0; i < N; i++) {
|
396
402
|
for (int j = 0; j < R + 1; j++) {
|
397
403
|
int id;
|
398
404
|
READ1(id);
|
399
405
|
if (id != EMPTY_ID) {
|
400
406
|
graph->at(i, j) = id;
|
401
|
-
size += 1;
|
402
407
|
} else {
|
403
408
|
break;
|
404
409
|
}
|
@@ -428,7 +433,7 @@ ProductQuantizer* read_ProductQuantizer(const char* fname) {
|
|
428
433
|
|
429
434
|
ProductQuantizer* read_ProductQuantizer(IOReader* reader) {
|
430
435
|
ProductQuantizer* pq = new ProductQuantizer();
|
431
|
-
|
436
|
+
std::unique_ptr<ProductQuantizer> del(pq);
|
432
437
|
|
433
438
|
read_ProductQuantizer(pq, reader);
|
434
439
|
del.release();
|
@@ -587,7 +592,7 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
587
592
|
READ1(idxp->encode_signs);
|
588
593
|
READ1(idxp->polysemous_ht);
|
589
594
|
}
|
590
|
-
// Old
|
595
|
+
// Old versions of PQ all had metric_type set to INNER_PRODUCT
|
591
596
|
// when they were in fact using L2. Therefore, we force metric type
|
592
597
|
// to L2 when the old format is detected
|
593
598
|
if (h == fourcc("IxPQ") || h == fourcc("IxPo")) {
|
@@ -600,7 +605,7 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
600
605
|
if (h == fourcc("IxRQ")) {
|
601
606
|
read_ResidualQuantizer_old(&idxr->rq, f);
|
602
607
|
} else {
|
603
|
-
read_ResidualQuantizer(&idxr->rq, f);
|
608
|
+
read_ResidualQuantizer(&idxr->rq, f, io_flags);
|
604
609
|
}
|
605
610
|
READ1(idxr->code_size);
|
606
611
|
READVECTOR(idxr->codes);
|
@@ -615,7 +620,7 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
615
620
|
} else if (h == fourcc("IxPR")) {
|
616
621
|
auto idxpr = new IndexProductResidualQuantizer();
|
617
622
|
read_index_header(idxpr, f);
|
618
|
-
read_ProductResidualQuantizer(&idxpr->prq, f);
|
623
|
+
read_ProductResidualQuantizer(&idxpr->prq, f, io_flags);
|
619
624
|
READ1(idxpr->code_size);
|
620
625
|
READVECTOR(idxpr->codes);
|
621
626
|
idx = idxpr;
|
@@ -629,8 +634,13 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
629
634
|
} else if (h == fourcc("ImRQ")) {
|
630
635
|
ResidualCoarseQuantizer* idxr = new ResidualCoarseQuantizer();
|
631
636
|
read_index_header(idxr, f);
|
632
|
-
read_ResidualQuantizer(&idxr->rq, f);
|
637
|
+
read_ResidualQuantizer(&idxr->rq, f, io_flags);
|
633
638
|
READ1(idxr->beam_factor);
|
639
|
+
if (io_flags & IO_FLAG_SKIP_PRECOMPUTE_TABLE) {
|
640
|
+
// then we force the beam factor to -1
|
641
|
+
// which skips the table precomputation.
|
642
|
+
idxr->beam_factor = -1;
|
643
|
+
}
|
634
644
|
idxr->set_beam_factor(idxr->beam_factor);
|
635
645
|
idx = idxr;
|
636
646
|
} else if (
|
@@ -655,13 +665,14 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
655
665
|
if (is_LSQ) {
|
656
666
|
read_LocalSearchQuantizer((LocalSearchQuantizer*)idxaqfs->aq, f);
|
657
667
|
} else if (is_RQ) {
|
658
|
-
read_ResidualQuantizer(
|
668
|
+
read_ResidualQuantizer(
|
669
|
+
(ResidualQuantizer*)idxaqfs->aq, f, io_flags);
|
659
670
|
} else if (is_PLSQ) {
|
660
671
|
read_ProductLocalSearchQuantizer(
|
661
672
|
(ProductLocalSearchQuantizer*)idxaqfs->aq, f);
|
662
673
|
} else {
|
663
674
|
read_ProductResidualQuantizer(
|
664
|
-
(ProductResidualQuantizer*)idxaqfs->aq, f);
|
675
|
+
(ProductResidualQuantizer*)idxaqfs->aq, f, io_flags);
|
665
676
|
}
|
666
677
|
|
667
678
|
READ1(idxaqfs->implem);
|
@@ -703,13 +714,13 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
703
714
|
if (is_LSQ) {
|
704
715
|
read_LocalSearchQuantizer((LocalSearchQuantizer*)ivaqfs->aq, f);
|
705
716
|
} else if (is_RQ) {
|
706
|
-
read_ResidualQuantizer((ResidualQuantizer*)ivaqfs->aq, f);
|
717
|
+
read_ResidualQuantizer((ResidualQuantizer*)ivaqfs->aq, f, io_flags);
|
707
718
|
} else if (is_PLSQ) {
|
708
719
|
read_ProductLocalSearchQuantizer(
|
709
720
|
(ProductLocalSearchQuantizer*)ivaqfs->aq, f);
|
710
721
|
} else {
|
711
722
|
read_ProductResidualQuantizer(
|
712
|
-
(ProductResidualQuantizer*)ivaqfs->aq, f);
|
723
|
+
(ProductResidualQuantizer*)ivaqfs->aq, f, io_flags);
|
713
724
|
}
|
714
725
|
|
715
726
|
READ1(ivaqfs->by_residual);
|
@@ -831,13 +842,13 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
831
842
|
if (is_LSQ) {
|
832
843
|
read_LocalSearchQuantizer((LocalSearchQuantizer*)iva->aq, f);
|
833
844
|
} else if (is_RQ) {
|
834
|
-
read_ResidualQuantizer((ResidualQuantizer*)iva->aq, f);
|
845
|
+
read_ResidualQuantizer((ResidualQuantizer*)iva->aq, f, io_flags);
|
835
846
|
} else if (is_PLSQ) {
|
836
847
|
read_ProductLocalSearchQuantizer(
|
837
848
|
(ProductLocalSearchQuantizer*)iva->aq, f);
|
838
849
|
} else {
|
839
850
|
read_ProductResidualQuantizer(
|
840
|
-
(ProductResidualQuantizer*)iva->aq, f);
|
851
|
+
(ProductResidualQuantizer*)iva->aq, f, io_flags);
|
841
852
|
}
|
842
853
|
READ1(iva->by_residual);
|
843
854
|
READ1(iva->use_precomputed_table);
|
@@ -860,7 +871,22 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
860
871
|
h == fourcc("IvPQ") || h == fourcc("IvQR") || h == fourcc("IwPQ") ||
|
861
872
|
h == fourcc("IwQR")) {
|
862
873
|
idx = read_ivfpq(f, h, io_flags);
|
863
|
-
|
874
|
+
} else if (h == fourcc("IwIQ")) {
|
875
|
+
auto* indep = new IndexIVFIndependentQuantizer();
|
876
|
+
indep->own_fields = true;
|
877
|
+
read_index_header(indep, f);
|
878
|
+
indep->quantizer = read_index(f, io_flags);
|
879
|
+
bool has_vt;
|
880
|
+
READ1(has_vt);
|
881
|
+
if (has_vt) {
|
882
|
+
indep->vt = read_VectorTransform(f);
|
883
|
+
}
|
884
|
+
indep->index_ivf = dynamic_cast<IndexIVF*>(read_index(f, io_flags));
|
885
|
+
FAISS_THROW_IF_NOT(indep->index_ivf);
|
886
|
+
if (auto index_ivfpq = dynamic_cast<IndexIVFPQ*>(indep->index_ivf)) {
|
887
|
+
READ1(index_ivfpq->use_precomputed_table);
|
888
|
+
}
|
889
|
+
idx = indep;
|
864
890
|
} else if (h == fourcc("IxPT")) {
|
865
891
|
IndexPreTransform* ixpt = new IndexPreTransform();
|
866
892
|
ixpt->own_fields = true;
|
@@ -34,6 +34,7 @@
|
|
34
34
|
#include <faiss/IndexIVFAdditiveQuantizer.h>
|
35
35
|
#include <faiss/IndexIVFAdditiveQuantizerFastScan.h>
|
36
36
|
#include <faiss/IndexIVFFlat.h>
|
37
|
+
#include <faiss/IndexIVFIndependentQuantizer.h>
|
37
38
|
#include <faiss/IndexIVFPQ.h>
|
38
39
|
#include <faiss/IndexIVFPQFastScan.h>
|
39
40
|
#include <faiss/IndexIVFPQR.h>
|
@@ -337,13 +338,11 @@ static void write_NSG(const NSG* nsg, IOWriter* f) {
|
|
337
338
|
FAISS_THROW_IF_NOT(K == nsg->R);
|
338
339
|
FAISS_THROW_IF_NOT(true == graph->own_fields);
|
339
340
|
|
340
|
-
int size = 0;
|
341
341
|
for (int i = 0; i < N; i++) {
|
342
342
|
for (int j = 0; j < K; j++) {
|
343
343
|
int id = graph->at(i, j);
|
344
344
|
if (id != EMPTY_ID) {
|
345
345
|
WRITE1(id);
|
346
|
-
size += 1;
|
347
346
|
} else {
|
348
347
|
break;
|
349
348
|
}
|
@@ -385,6 +384,8 @@ static void write_ivf_header(const IndexIVF* ivf, IOWriter* f) {
|
|
385
384
|
write_index_header(ivf, f);
|
386
385
|
WRITE1(ivf->nlist);
|
387
386
|
WRITE1(ivf->nprobe);
|
387
|
+
// subclasses write by_residual (some of them support only one setting of
|
388
|
+
// by_residual).
|
388
389
|
write_index(ivf->quantizer, f);
|
389
390
|
write_direct_map(&ivf->direct_map, f);
|
390
391
|
}
|
@@ -430,13 +431,14 @@ void write_index(const Index* idx, IOWriter* f) {
|
|
430
431
|
WRITE1(idxr->code_size);
|
431
432
|
WRITEVECTOR(idxr->codes);
|
432
433
|
} else if (
|
433
|
-
auto*
|
434
|
+
auto* idxr_2 =
|
435
|
+
dynamic_cast<const IndexLocalSearchQuantizer*>(idx)) {
|
434
436
|
uint32_t h = fourcc("IxLS");
|
435
437
|
WRITE1(h);
|
436
438
|
write_index_header(idx, f);
|
437
|
-
write_LocalSearchQuantizer(&
|
438
|
-
WRITE1(
|
439
|
-
WRITEVECTOR(
|
439
|
+
write_LocalSearchQuantizer(&idxr_2->lsq, f);
|
440
|
+
WRITE1(idxr_2->code_size);
|
441
|
+
WRITEVECTOR(idxr_2->codes);
|
440
442
|
} else if (
|
441
443
|
const IndexProductResidualQuantizer* idxpr =
|
442
444
|
dynamic_cast<const IndexProductResidualQuantizer*>(idx)) {
|
@@ -569,26 +571,26 @@ void write_index(const Index* idx, IOWriter* f) {
|
|
569
571
|
|
570
572
|
write_InvertedLists(ivaqfs->invlists, f);
|
571
573
|
} else if (
|
572
|
-
const ResidualCoarseQuantizer*
|
574
|
+
const ResidualCoarseQuantizer* idxr_2 =
|
573
575
|
dynamic_cast<const ResidualCoarseQuantizer*>(idx)) {
|
574
576
|
uint32_t h = fourcc("ImRQ");
|
575
577
|
WRITE1(h);
|
576
578
|
write_index_header(idx, f);
|
577
|
-
write_ResidualQuantizer(&
|
578
|
-
WRITE1(
|
579
|
+
write_ResidualQuantizer(&idxr_2->rq, f);
|
580
|
+
WRITE1(idxr_2->beam_factor);
|
579
581
|
} else if (
|
580
|
-
const Index2Layer*
|
582
|
+
const Index2Layer* idxp_2 = dynamic_cast<const Index2Layer*>(idx)) {
|
581
583
|
uint32_t h = fourcc("Ix2L");
|
582
584
|
WRITE1(h);
|
583
585
|
write_index_header(idx, f);
|
584
|
-
write_index(
|
585
|
-
WRITE1(
|
586
|
-
WRITE1(
|
587
|
-
write_ProductQuantizer(&
|
588
|
-
WRITE1(
|
589
|
-
WRITE1(
|
590
|
-
WRITE1(
|
591
|
-
WRITEVECTOR(
|
586
|
+
write_index(idxp_2->q1.quantizer, f);
|
587
|
+
WRITE1(idxp_2->q1.nlist);
|
588
|
+
WRITE1(idxp_2->q1.quantizer_trains_alone);
|
589
|
+
write_ProductQuantizer(&idxp_2->pq, f);
|
590
|
+
WRITE1(idxp_2->code_size_1);
|
591
|
+
WRITE1(idxp_2->code_size_2);
|
592
|
+
WRITE1(idxp_2->code_size);
|
593
|
+
WRITEVECTOR(idxp_2->codes);
|
592
594
|
} else if (
|
593
595
|
const IndexScalarQuantizer* idxs =
|
594
596
|
dynamic_cast<const IndexScalarQuantizer*>(idx)) {
|
@@ -598,15 +600,16 @@ void write_index(const Index* idx, IOWriter* f) {
|
|
598
600
|
write_ScalarQuantizer(&idxs->sq, f);
|
599
601
|
WRITEVECTOR(idxs->codes);
|
600
602
|
} else if (
|
601
|
-
const IndexLattice*
|
603
|
+
const IndexLattice* idxl_2 =
|
604
|
+
dynamic_cast<const IndexLattice*>(idx)) {
|
602
605
|
uint32_t h = fourcc("IxLa");
|
603
606
|
WRITE1(h);
|
604
|
-
WRITE1(
|
605
|
-
WRITE1(
|
606
|
-
WRITE1(
|
607
|
-
WRITE1(
|
607
|
+
WRITE1(idxl_2->d);
|
608
|
+
WRITE1(idxl_2->nsq);
|
609
|
+
WRITE1(idxl_2->scale_nbit);
|
610
|
+
WRITE1(idxl_2->zn_sphere_codec.r2);
|
608
611
|
write_index_header(idx, f);
|
609
|
-
WRITEVECTOR(
|
612
|
+
WRITEVECTOR(idxl_2->trained);
|
610
613
|
} else if (
|
611
614
|
const IndexIVFFlatDedup* ivfl =
|
612
615
|
dynamic_cast<const IndexIVFFlatDedup*>(idx)) {
|
@@ -625,11 +628,12 @@ void write_index(const Index* idx, IOWriter* f) {
|
|
625
628
|
}
|
626
629
|
write_InvertedLists(ivfl->invlists, f);
|
627
630
|
} else if (
|
628
|
-
const IndexIVFFlat*
|
631
|
+
const IndexIVFFlat* ivfl_2 =
|
632
|
+
dynamic_cast<const IndexIVFFlat*>(idx)) {
|
629
633
|
uint32_t h = fourcc("IwFl");
|
630
634
|
WRITE1(h);
|
631
|
-
write_ivf_header(
|
632
|
-
write_InvertedLists(
|
635
|
+
write_ivf_header(ivfl_2, f);
|
636
|
+
write_InvertedLists(ivfl_2->invlists, f);
|
633
637
|
} else if (
|
634
638
|
const IndexIVFScalarQuantizer* ivsc =
|
635
639
|
dynamic_cast<const IndexIVFScalarQuantizer*>(idx)) {
|
@@ -700,7 +704,22 @@ void write_index(const Index* idx, IOWriter* f) {
|
|
700
704
|
WRITEVECTOR(ivfpqr->refine_codes);
|
701
705
|
WRITE1(ivfpqr->k_factor);
|
702
706
|
}
|
703
|
-
|
707
|
+
} else if (
|
708
|
+
auto* indep =
|
709
|
+
dynamic_cast<const IndexIVFIndependentQuantizer*>(idx)) {
|
710
|
+
uint32_t h = fourcc("IwIQ");
|
711
|
+
WRITE1(h);
|
712
|
+
write_index_header(indep, f);
|
713
|
+
write_index(indep->quantizer, f);
|
714
|
+
bool has_vt = indep->vt != nullptr;
|
715
|
+
WRITE1(has_vt);
|
716
|
+
if (has_vt) {
|
717
|
+
write_VectorTransform(indep->vt, f);
|
718
|
+
}
|
719
|
+
write_index(indep->index_ivf, f);
|
720
|
+
if (auto index_ivfpq = dynamic_cast<IndexIVFPQ*>(indep->index_ivf)) {
|
721
|
+
WRITE1(index_ivfpq->use_precomputed_table);
|
722
|
+
}
|
704
723
|
} else if (
|
705
724
|
const IndexPreTransform* ixpt =
|
706
725
|
dynamic_cast<const IndexPreTransform*>(idx)) {
|
@@ -788,19 +807,19 @@ void write_index(const Index* idx, IOWriter* f) {
|
|
788
807
|
WRITE1(idxpqfs->M2);
|
789
808
|
WRITEVECTOR(idxpqfs->codes);
|
790
809
|
} else if (
|
791
|
-
const IndexIVFPQFastScan*
|
810
|
+
const IndexIVFPQFastScan* ivpq_2 =
|
792
811
|
dynamic_cast<const IndexIVFPQFastScan*>(idx)) {
|
793
812
|
uint32_t h = fourcc("IwPf");
|
794
813
|
WRITE1(h);
|
795
|
-
write_ivf_header(
|
796
|
-
WRITE1(
|
797
|
-
WRITE1(
|
798
|
-
WRITE1(
|
799
|
-
WRITE1(
|
800
|
-
WRITE1(
|
801
|
-
WRITE1(
|
802
|
-
write_ProductQuantizer(&
|
803
|
-
write_InvertedLists(
|
814
|
+
write_ivf_header(ivpq_2, f);
|
815
|
+
WRITE1(ivpq_2->by_residual);
|
816
|
+
WRITE1(ivpq_2->code_size);
|
817
|
+
WRITE1(ivpq_2->bbs);
|
818
|
+
WRITE1(ivpq_2->M2);
|
819
|
+
WRITE1(ivpq_2->implem);
|
820
|
+
WRITE1(ivpq_2->qbs2);
|
821
|
+
write_ProductQuantizer(&ivpq_2->pq, f);
|
822
|
+
write_InvertedLists(ivpq_2->invlists, f);
|
804
823
|
} else if (
|
805
824
|
const IndexRowwiseMinMax* imm =
|
806
825
|
dynamic_cast<const IndexRowwiseMinMax*>(idx)) {
|
@@ -810,13 +829,13 @@ void write_index(const Index* idx, IOWriter* f) {
|
|
810
829
|
write_index_header(imm, f);
|
811
830
|
write_index(imm->index, f);
|
812
831
|
} else if (
|
813
|
-
const IndexRowwiseMinMaxFP16*
|
832
|
+
const IndexRowwiseMinMaxFP16* imm_2 =
|
814
833
|
dynamic_cast<const IndexRowwiseMinMaxFP16*>(idx)) {
|
815
834
|
// IndexRowwiseMinmaxHalf
|
816
835
|
uint32_t h = fourcc("IRMh");
|
817
836
|
WRITE1(h);
|
818
|
-
write_index_header(
|
819
|
-
write_index(
|
837
|
+
write_index_header(imm_2, f);
|
838
|
+
write_index(imm_2->index, f);
|
820
839
|
} else {
|
821
840
|
FAISS_THROW_MSG("don't know how to serialize this type of index");
|
822
841
|
}
|
@@ -196,13 +196,13 @@ size_t BufferedIOWriter::operator()(
|
|
196
196
|
while (size > 0) {
|
197
197
|
assert(b0 == bsz);
|
198
198
|
// now we need to flush to add more bytes
|
199
|
-
size_t
|
199
|
+
size_t ofs_2 = 0;
|
200
200
|
do {
|
201
|
-
assert(
|
202
|
-
size_t written = (*writer)(buffer.data() +
|
201
|
+
assert(ofs_2 < 10000000);
|
202
|
+
size_t written = (*writer)(buffer.data() + ofs_2, 1, bsz - ofs_2);
|
203
203
|
FAISS_THROW_IF_NOT(written > 0);
|
204
|
-
|
205
|
-
} while (
|
204
|
+
ofs_2 += written;
|
205
|
+
} while (ofs_2 != bsz);
|
206
206
|
|
207
207
|
// copy src to buffer
|
208
208
|
size_t nb1 = std::min(bsz, size);
|
@@ -217,12 +217,12 @@ size_t BufferedIOWriter::operator()(
|
|
217
217
|
}
|
218
218
|
|
219
219
|
BufferedIOWriter::~BufferedIOWriter() {
|
220
|
-
size_t
|
221
|
-
while (
|
222
|
-
// printf("Destructor write %zd \n", b0 -
|
223
|
-
size_t written = (*writer)(buffer.data() +
|
220
|
+
size_t ofs_2 = 0;
|
221
|
+
while (ofs_2 != b0) {
|
222
|
+
// printf("Destructor write %zd \n", b0 - ofs_2);
|
223
|
+
size_t written = (*writer)(buffer.data() + ofs_2, 1, b0 - ofs_2);
|
224
224
|
FAISS_THROW_IF_NOT(written > 0);
|
225
|
-
|
225
|
+
ofs_2 += written;
|
226
226
|
}
|
227
227
|
}
|
228
228
|
|
@@ -40,11 +40,13 @@
|
|
40
40
|
|
41
41
|
#include <intrin.h>
|
42
42
|
|
43
|
+
#ifndef __clang__
|
43
44
|
inline int __builtin_ctzll(uint64_t x) {
|
44
45
|
unsigned long ret;
|
45
46
|
_BitScanForward64(&ret, x);
|
46
47
|
return (int)ret;
|
47
48
|
}
|
49
|
+
#endif
|
48
50
|
|
49
51
|
// cudatoolkit provides __builtin_ctz for NVCC >= 11.0
|
50
52
|
#if !defined(__CUDACC__) || __CUDACC_VER_MAJOR__ < 11
|
@@ -55,13 +57,20 @@ inline int __builtin_ctz(unsigned long x) {
|
|
55
57
|
}
|
56
58
|
#endif
|
57
59
|
|
60
|
+
#ifndef __clang__
|
58
61
|
inline int __builtin_clzll(uint64_t x) {
|
59
62
|
return (int)__lzcnt64(x);
|
60
63
|
}
|
64
|
+
#endif
|
61
65
|
|
62
66
|
#define __builtin_popcount __popcnt
|
63
67
|
#define __builtin_popcountl __popcnt64
|
64
68
|
|
69
|
+
#ifndef __clang__
|
70
|
+
#define __m128i_u __m128i
|
71
|
+
#define __m256i_u __m256i
|
72
|
+
#endif
|
73
|
+
|
65
74
|
// MSVC does not define __SSEx__, and _M_IX86_FP is only defined on 32-bit
|
66
75
|
// processors cf.
|
67
76
|
// https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros
|
@@ -82,6 +91,8 @@ inline int __builtin_clzll(uint64_t x) {
|
|
82
91
|
#define __F16C__ 1
|
83
92
|
#endif
|
84
93
|
|
94
|
+
#define FAISS_ALWAYS_INLINE __forceinline
|
95
|
+
|
85
96
|
#else
|
86
97
|
/*******************************************************
|
87
98
|
* Linux and OSX
|
@@ -92,9 +103,15 @@ inline int __builtin_clzll(uint64_t x) {
|
|
92
103
|
|
93
104
|
// aligned should be *in front* of the declaration, for compatibility with
|
94
105
|
// windows
|
106
|
+
#ifdef SWIG
|
107
|
+
#define ALIGNED(x)
|
108
|
+
#else
|
95
109
|
#define ALIGNED(x) __attribute__((aligned(x)))
|
110
|
+
#endif
|
96
111
|
|
97
|
-
#
|
112
|
+
#define FAISS_ALWAYS_INLINE __attribute__((always_inline)) inline
|
113
|
+
|
114
|
+
#endif
|
98
115
|
|
99
116
|
#if defined(__GNUC__) || defined(__clang__)
|
100
117
|
#define FAISS_DEPRECATED(msg) __attribute__((deprecated(msg)))
|
@@ -24,6 +24,9 @@
|
|
24
24
|
|
25
25
|
namespace faiss {
|
26
26
|
|
27
|
+
struct NormTableScaler;
|
28
|
+
struct SIMDResultHandler;
|
29
|
+
|
27
30
|
/** Pack codes for consumption by the SIMD kernels.
|
28
31
|
* The unused bytes are set to 0.
|
29
32
|
*
|
@@ -117,7 +120,6 @@ void pq4_pack_LUT(int nq, int nsq, const uint8_t* src, uint8_t* dest);
|
|
117
120
|
* @param LUT packed look-up table
|
118
121
|
* @param scaler scaler to scale the encoded norm
|
119
122
|
*/
|
120
|
-
template <class ResultHandler, class Scaler>
|
121
123
|
void pq4_accumulate_loop(
|
122
124
|
int nq,
|
123
125
|
size_t nb,
|
@@ -125,8 +127,8 @@ void pq4_accumulate_loop(
|
|
125
127
|
int nsq,
|
126
128
|
const uint8_t* codes,
|
127
129
|
const uint8_t* LUT,
|
128
|
-
|
129
|
-
const
|
130
|
+
SIMDResultHandler& res,
|
131
|
+
const NormTableScaler* scaler);
|
130
132
|
|
131
133
|
/* qbs versions, supported only for bbs=32.
|
132
134
|
*
|
@@ -178,14 +180,13 @@ int pq4_pack_LUT_qbs_q_map(
|
|
178
180
|
* @param res call-back for the resutls
|
179
181
|
* @param scaler scaler to scale the encoded norm
|
180
182
|
*/
|
181
|
-
template <class ResultHandler, class Scaler>
|
182
183
|
void pq4_accumulate_loop_qbs(
|
183
184
|
int qbs,
|
184
185
|
size_t nb,
|
185
186
|
int nsq,
|
186
187
|
const uint8_t* codes,
|
187
188
|
const uint8_t* LUT,
|
188
|
-
|
189
|
-
const
|
189
|
+
SIMDResultHandler& res,
|
190
|
+
const NormTableScaler* scaler = nullptr);
|
190
191
|
|
191
192
|
} // namespace faiss
|
@@ -125,7 +125,7 @@ void accumulate_fixed_blocks(
|
|
125
125
|
ResultHandler& res,
|
126
126
|
const Scaler& scaler) {
|
127
127
|
constexpr int bbs = 32 * BB;
|
128
|
-
for (
|
128
|
+
for (size_t j0 = 0; j0 < nb; j0 += bbs) {
|
129
129
|
FixedStorageHandler<NQ, 2 * BB> res2;
|
130
130
|
kernel_accumulate_block<NQ, BB>(nsq, codes, LUT, res2, scaler);
|
131
131
|
res.set_block_origin(0, j0);
|
@@ -134,10 +134,8 @@ void accumulate_fixed_blocks(
|
|
134
134
|
}
|
135
135
|
}
|
136
136
|
|
137
|
-
} // anonymous namespace
|
138
|
-
|
139
137
|
template <class ResultHandler, class Scaler>
|
140
|
-
void
|
138
|
+
void pq4_accumulate_loop_fixed_scaler(
|
141
139
|
int nq,
|
142
140
|
size_t nb,
|
143
141
|
int bbs,
|
@@ -172,39 +170,55 @@ void pq4_accumulate_loop(
|
|
172
170
|
#undef DISPATCH
|
173
171
|
}
|
174
172
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
173
|
+
template <class ResultHandler>
|
174
|
+
void pq4_accumulate_loop_fixed_handler(
|
175
|
+
int nq,
|
176
|
+
size_t nb,
|
177
|
+
int bbs,
|
178
|
+
int nsq,
|
179
|
+
const uint8_t* codes,
|
180
|
+
const uint8_t* LUT,
|
181
|
+
ResultHandler& res,
|
182
|
+
const NormTableScaler* scaler) {
|
183
|
+
if (scaler) {
|
184
|
+
pq4_accumulate_loop_fixed_scaler(
|
185
|
+
nq, nb, bbs, nsq, codes, LUT, res, *scaler);
|
186
|
+
} else {
|
187
|
+
DummyScaler dscaler;
|
188
|
+
pq4_accumulate_loop_fixed_scaler(
|
189
|
+
nq, nb, bbs, nsq, codes, LUT, res, dscaler);
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
struct Run_pq4_accumulate_loop {
|
194
|
+
template <class ResultHandler>
|
195
|
+
void f(ResultHandler& res,
|
196
|
+
int nq,
|
197
|
+
size_t nb,
|
198
|
+
int bbs,
|
199
|
+
int nsq,
|
200
|
+
const uint8_t* codes,
|
201
|
+
const uint8_t* LUT,
|
202
|
+
const NormTableScaler* scaler) {
|
203
|
+
pq4_accumulate_loop_fixed_handler(
|
204
|
+
nq, nb, bbs, nsq, codes, LUT, res, scaler);
|
205
|
+
}
|
206
|
+
};
|
207
|
+
|
208
|
+
} // anonymous namespace
|
209
|
+
|
210
|
+
void pq4_accumulate_loop(
|
211
|
+
int nq,
|
212
|
+
size_t nb,
|
213
|
+
int bbs,
|
214
|
+
int nsq,
|
215
|
+
const uint8_t* codes,
|
216
|
+
const uint8_t* LUT,
|
217
|
+
SIMDResultHandler& res,
|
218
|
+
const NormTableScaler* scaler) {
|
219
|
+
Run_pq4_accumulate_loop consumer;
|
220
|
+
dispatch_SIMDResultHanlder(
|
221
|
+
res, consumer, nq, nb, bbs, nsq, codes, LUT, scaler);
|
222
|
+
}
|
209
223
|
|
210
224
|
} // namespace faiss
|