faiss 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/ext/faiss/index.cpp +36 -10
- data/ext/faiss/index_binary.cpp +19 -6
- data/ext/faiss/kmeans.cpp +6 -6
- data/ext/faiss/numo.hpp +273 -123
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +2 -3
- data/vendor/faiss/faiss/AutoTune.h +1 -1
- data/vendor/faiss/faiss/Clustering.cpp +2 -2
- data/vendor/faiss/faiss/Clustering.h +2 -2
- data/vendor/faiss/faiss/IVFlib.cpp +1 -2
- data/vendor/faiss/faiss/IVFlib.h +1 -1
- data/vendor/faiss/faiss/Index.h +10 -10
- data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
- data/vendor/faiss/faiss/Index2Layer.h +2 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexBinary.h +7 -7
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +3 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
- data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
- data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
- data/vendor/faiss/faiss/IndexFastScan.h +107 -7
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -1
- data/vendor/faiss/faiss/IndexHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
- data/vendor/faiss/faiss/IndexIDMap.h +6 -6
- data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
- data/vendor/faiss/faiss/IndexIVF.h +5 -5
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
- data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +366 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +13 -6
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +650 -0
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +216 -0
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
- data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
- data/vendor/faiss/faiss/IndexPQ.h +1 -1
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
- data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +13 -10
- data/vendor/faiss/faiss/IndexRaBitQ.h +7 -2
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +586 -0
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +149 -0
- data/vendor/faiss/faiss/IndexShards.cpp +1 -1
- data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
- data/vendor/faiss/faiss/MetricType.h +1 -1
- data/vendor/faiss/faiss/VectorTransform.h +2 -2
- data/vendor/faiss/faiss/clone_index.cpp +3 -1
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +10 -6
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
- data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
- data/vendor/faiss/faiss/impl/DistanceComputer.h +3 -3
- data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +1 -1
- data/vendor/faiss/faiss/impl/HNSW.h +4 -4
- data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
- data/vendor/faiss/faiss/impl/IDSelector.h +1 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
- data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
- data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
- data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +246 -0
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +153 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +54 -158
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +2 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
- data/vendor/faiss/faiss/impl/index_read.cpp +87 -3
- data/vendor/faiss/faiss/impl/index_write.cpp +73 -3
- data/vendor/faiss/faiss/impl/io.cpp +2 -2
- data/vendor/faiss/faiss/impl/io.h +4 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
- data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
- data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
- data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
- data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
- data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
- data/vendor/faiss/faiss/index_factory.cpp +43 -1
- data/vendor/faiss/faiss/index_factory.h +1 -1
- data/vendor/faiss/faiss/index_io.h +1 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +205 -0
- data/vendor/faiss/faiss/invlists/InvertedLists.h +62 -0
- data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
- data/vendor/faiss/faiss/utils/Heap.h +3 -3
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
- data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
- data/vendor/faiss/faiss/utils/distances.h +2 -2
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
- data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
- data/vendor/faiss/faiss/utils/hamming.h +1 -1
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
- data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
- data/vendor/faiss/faiss/utils/partitioning.h +2 -2
- data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
- data/vendor/faiss/faiss/utils/random.cpp +1 -1
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
- data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
- data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
- data/vendor/faiss/faiss/utils/utils.cpp +5 -2
- data/vendor/faiss/faiss/utils/utils.h +2 -2
- metadata +14 -3
|
@@ -27,10 +27,12 @@
|
|
|
27
27
|
#include <faiss/IndexIVFAdditiveQuantizer.h>
|
|
28
28
|
#include <faiss/IndexIVFAdditiveQuantizerFastScan.h>
|
|
29
29
|
#include <faiss/IndexIVFFlat.h>
|
|
30
|
+
#include <faiss/IndexIVFFlatPanorama.h>
|
|
30
31
|
#include <faiss/IndexIVFPQ.h>
|
|
31
32
|
#include <faiss/IndexIVFPQFastScan.h>
|
|
32
33
|
#include <faiss/IndexIVFPQR.h>
|
|
33
34
|
#include <faiss/IndexIVFRaBitQ.h>
|
|
35
|
+
#include <faiss/IndexIVFRaBitQFastScan.h>
|
|
34
36
|
#include <faiss/IndexIVFSpectralHash.h>
|
|
35
37
|
#include <faiss/IndexLSH.h>
|
|
36
38
|
#include <faiss/IndexLattice.h>
|
|
@@ -39,6 +41,7 @@
|
|
|
39
41
|
#include <faiss/IndexPQFastScan.h>
|
|
40
42
|
#include <faiss/IndexPreTransform.h>
|
|
41
43
|
#include <faiss/IndexRaBitQ.h>
|
|
44
|
+
#include <faiss/IndexRaBitQFastScan.h>
|
|
42
45
|
#include <faiss/IndexRefine.h>
|
|
43
46
|
#include <faiss/IndexRowwiseMinMax.h>
|
|
44
47
|
#include <faiss/IndexScalarQuantizer.h>
|
|
@@ -49,6 +52,9 @@
|
|
|
49
52
|
#include <faiss/IndexBinaryHNSW.h>
|
|
50
53
|
#include <faiss/IndexBinaryHash.h>
|
|
51
54
|
#include <faiss/IndexBinaryIVF.h>
|
|
55
|
+
#include <faiss/IndexIDMap.h>
|
|
56
|
+
#include <algorithm>
|
|
57
|
+
#include <cctype>
|
|
52
58
|
#include <string>
|
|
53
59
|
|
|
54
60
|
namespace faiss {
|
|
@@ -326,6 +332,10 @@ IndexIVF* parse_IndexIVF(
|
|
|
326
332
|
if (match("FlatDedup")) {
|
|
327
333
|
return new IndexIVFFlatDedup(get_q(), d, nlist, mt, own_il);
|
|
328
334
|
}
|
|
335
|
+
if (match("FlatPanorama([0-9]+)?")) {
|
|
336
|
+
int nlevels = mres_to_int(sm[1], 8); // default to 8 levels
|
|
337
|
+
return new IndexIVFFlatPanorama(get_q(), d, nlist, nlevels, mt, own_il);
|
|
338
|
+
}
|
|
329
339
|
if (match(sq_pattern)) {
|
|
330
340
|
return new IndexIVFScalarQuantizer(
|
|
331
341
|
get_q(),
|
|
@@ -450,6 +460,10 @@ IndexIVF* parse_IndexIVF(
|
|
|
450
460
|
if (match(rabitq_pattern)) {
|
|
451
461
|
return new IndexIVFRaBitQ(get_q(), d, nlist, mt, own_il);
|
|
452
462
|
}
|
|
463
|
+
if (match("RaBitQfs(_[0-9]+)?")) {
|
|
464
|
+
int bbs = mres_to_int(sm[1], 32, 1);
|
|
465
|
+
return new IndexIVFRaBitQFastScan(get_q(), d, nlist, mt, bbs, own_il);
|
|
466
|
+
}
|
|
453
467
|
return nullptr;
|
|
454
468
|
}
|
|
455
469
|
|
|
@@ -676,6 +690,12 @@ Index* parse_other_indexes(
|
|
|
676
690
|
return new IndexRaBitQ(d, metric);
|
|
677
691
|
}
|
|
678
692
|
|
|
693
|
+
// IndexRaBitQFastScan
|
|
694
|
+
if (match("RaBitQfs(_[0-9]+)?")) {
|
|
695
|
+
int bbs = mres_to_int(sm[1], 32, 1);
|
|
696
|
+
return new IndexRaBitQFastScan(d, metric, bbs);
|
|
697
|
+
}
|
|
698
|
+
|
|
679
699
|
return nullptr;
|
|
680
700
|
}
|
|
681
701
|
|
|
@@ -934,6 +954,28 @@ IndexBinary* index_binary_factory(
|
|
|
934
954
|
bool own_invlists) {
|
|
935
955
|
IndexBinary* index = nullptr;
|
|
936
956
|
|
|
957
|
+
std::smatch sm;
|
|
958
|
+
std::string desc_str(description);
|
|
959
|
+
|
|
960
|
+
// Handle IDMap2 and IDMap wrappers (prefix or suffix)
|
|
961
|
+
if (re_match(desc_str, "(.+),IDMap2", sm) ||
|
|
962
|
+
re_match(desc_str, "IDMap2,(.+)", sm)) {
|
|
963
|
+
IndexBinary* sub_index =
|
|
964
|
+
index_binary_factory(d, sm[1].str().c_str(), own_invlists);
|
|
965
|
+
IndexBinaryIDMap2* idmap2 = new IndexBinaryIDMap2(sub_index);
|
|
966
|
+
idmap2->own_fields = true;
|
|
967
|
+
return idmap2;
|
|
968
|
+
}
|
|
969
|
+
|
|
970
|
+
if (re_match(desc_str, "(.+),IDMap", sm) ||
|
|
971
|
+
re_match(desc_str, "IDMap,(.+)", sm)) {
|
|
972
|
+
IndexBinary* sub_index =
|
|
973
|
+
index_binary_factory(d, sm[1].str().c_str(), own_invlists);
|
|
974
|
+
IndexBinaryIDMap* idmap = new IndexBinaryIDMap(sub_index);
|
|
975
|
+
idmap->own_fields = true;
|
|
976
|
+
return idmap;
|
|
977
|
+
}
|
|
978
|
+
|
|
937
979
|
int ncentroids = -1;
|
|
938
980
|
int M, nhash, b;
|
|
939
981
|
|
|
@@ -959,7 +1001,7 @@ IndexBinary* index_binary_factory(
|
|
|
959
1001
|
} else if (sscanf(description, "BHash%d", &b) == 1) {
|
|
960
1002
|
index = new IndexBinaryHash(d, b);
|
|
961
1003
|
|
|
962
|
-
} else if (
|
|
1004
|
+
} else if (desc_str == "BFlat") {
|
|
963
1005
|
index = new IndexBinaryFlat(d);
|
|
964
1006
|
|
|
965
1007
|
} else {
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
* object that abstracts the medium.
|
|
17
17
|
*
|
|
18
18
|
* The read functions return objects that should be deallocated with
|
|
19
|
-
* delete. All references within these
|
|
19
|
+
* delete. All references within these objects are owned by the
|
|
20
20
|
* object.
|
|
21
21
|
*/
|
|
22
22
|
|
|
@@ -346,6 +346,211 @@ void ArrayInvertedLists::permute_invlists(const idx_t* map) {
|
|
|
346
346
|
|
|
347
347
|
ArrayInvertedLists::~ArrayInvertedLists() {}
|
|
348
348
|
|
|
349
|
+
/***********************************************
|
|
350
|
+
* ArrayInvertedListsPanorama implementation
|
|
351
|
+
**********************************************/
|
|
352
|
+
|
|
353
|
+
ArrayInvertedListsPanorama::ArrayInvertedListsPanorama(
|
|
354
|
+
size_t nlist,
|
|
355
|
+
size_t code_size,
|
|
356
|
+
size_t n_levels)
|
|
357
|
+
: ArrayInvertedLists(nlist, code_size),
|
|
358
|
+
n_levels(n_levels),
|
|
359
|
+
level_width(
|
|
360
|
+
(((code_size / sizeof(float)) + n_levels - 1) / n_levels) *
|
|
361
|
+
sizeof(float)) {
|
|
362
|
+
FAISS_THROW_IF_NOT(n_levels > 0);
|
|
363
|
+
FAISS_THROW_IF_NOT(code_size % sizeof(float) == 0);
|
|
364
|
+
FAISS_THROW_IF_NOT_MSG(
|
|
365
|
+
!use_iterator,
|
|
366
|
+
"IndexIVFFlatPanorama does not support iterators, use vanilla IndexIVFFlat instead");
|
|
367
|
+
FAISS_ASSERT(level_width % sizeof(float) == 0);
|
|
368
|
+
|
|
369
|
+
cum_sums.resize(nlist);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
const float* ArrayInvertedListsPanorama::get_cum_sums(size_t list_no) const {
|
|
373
|
+
assert(list_no < nlist);
|
|
374
|
+
return cum_sums[list_no].data();
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
size_t ArrayInvertedListsPanorama::add_entries(
|
|
378
|
+
size_t list_no,
|
|
379
|
+
size_t n_entry,
|
|
380
|
+
const idx_t* ids_in,
|
|
381
|
+
const uint8_t* code) {
|
|
382
|
+
assert(list_no < nlist);
|
|
383
|
+
size_t o = ids[list_no].size();
|
|
384
|
+
|
|
385
|
+
ids[list_no].resize(o + n_entry);
|
|
386
|
+
memcpy(&ids[list_no][o], ids_in, sizeof(ids_in[0]) * n_entry);
|
|
387
|
+
|
|
388
|
+
size_t new_size = o + n_entry;
|
|
389
|
+
size_t num_batches = (new_size + kBatchSize - 1) / kBatchSize;
|
|
390
|
+
codes[list_no].resize(num_batches * kBatchSize * code_size);
|
|
391
|
+
cum_sums[list_no].resize(num_batches * kBatchSize * (n_levels + 1));
|
|
392
|
+
|
|
393
|
+
copy_codes_to_level_layout(list_no, o, n_entry, code);
|
|
394
|
+
compute_cumulative_sums(list_no, o, n_entry, code);
|
|
395
|
+
|
|
396
|
+
return o;
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
void ArrayInvertedListsPanorama::update_entries(
|
|
400
|
+
size_t list_no,
|
|
401
|
+
size_t offset,
|
|
402
|
+
size_t n_entry,
|
|
403
|
+
const idx_t* ids_in,
|
|
404
|
+
const uint8_t* code) {
|
|
405
|
+
assert(list_no < nlist);
|
|
406
|
+
assert(n_entry + offset <= ids[list_no].size());
|
|
407
|
+
|
|
408
|
+
memcpy(&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
|
|
409
|
+
copy_codes_to_level_layout(list_no, offset, n_entry, code);
|
|
410
|
+
compute_cumulative_sums(list_no, offset, n_entry, code);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
void ArrayInvertedListsPanorama::resize(size_t list_no, size_t new_size) {
|
|
414
|
+
ids[list_no].resize(new_size);
|
|
415
|
+
|
|
416
|
+
size_t num_batches = (new_size + kBatchSize - 1) / kBatchSize;
|
|
417
|
+
codes[list_no].resize(num_batches * kBatchSize * code_size);
|
|
418
|
+
cum_sums[list_no].resize(num_batches * kBatchSize * (n_levels + 1));
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
const uint8_t* ArrayInvertedListsPanorama::get_single_code(
|
|
422
|
+
size_t list_no,
|
|
423
|
+
size_t offset) const {
|
|
424
|
+
assert(list_no < nlist);
|
|
425
|
+
assert(offset < ids[list_no].size());
|
|
426
|
+
|
|
427
|
+
uint8_t* recons_buffer = new uint8_t[code_size];
|
|
428
|
+
|
|
429
|
+
const uint8_t* codes_base = codes[list_no].data();
|
|
430
|
+
|
|
431
|
+
size_t batch_no = offset / kBatchSize;
|
|
432
|
+
size_t pos_in_batch = offset % kBatchSize;
|
|
433
|
+
size_t batch_offset = batch_no * kBatchSize * code_size;
|
|
434
|
+
|
|
435
|
+
for (size_t level = 0; level < n_levels; level++) {
|
|
436
|
+
size_t level_offset = level * level_width * kBatchSize;
|
|
437
|
+
const uint8_t* src = codes_base + batch_offset + level_offset +
|
|
438
|
+
pos_in_batch * level_width;
|
|
439
|
+
uint8_t* dest = recons_buffer + level * level_width;
|
|
440
|
+
size_t copy_size =
|
|
441
|
+
std::min(level_width, code_size - level * level_width);
|
|
442
|
+
memcpy(dest, src, copy_size);
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
return recons_buffer;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
void ArrayInvertedListsPanorama::release_codes(
|
|
449
|
+
size_t list_no,
|
|
450
|
+
const uint8_t* codes) const {
|
|
451
|
+
// Only delete if it's heap-allocated (from get_single_code).
|
|
452
|
+
// If it's from get_codes (raw storage), it will be codes[list_no].data()
|
|
453
|
+
if (codes != this->codes[list_no].data()) {
|
|
454
|
+
delete[] codes;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
InvertedListsIterator* ArrayInvertedListsPanorama::get_iterator(
|
|
459
|
+
size_t /* list_no */,
|
|
460
|
+
void* /* inverted_list_context */) const {
|
|
461
|
+
FAISS_THROW_MSG(
|
|
462
|
+
"IndexIVFFlatPanorama does not support iterators, use vanilla IndexIVFFlat instead");
|
|
463
|
+
return nullptr;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
void ArrayInvertedListsPanorama::compute_cumulative_sums(
|
|
467
|
+
size_t list_no,
|
|
468
|
+
size_t offset,
|
|
469
|
+
size_t n_entry,
|
|
470
|
+
const uint8_t* code) {
|
|
471
|
+
// Cast to float* is safe here as we guarantee codes are always float
|
|
472
|
+
// vectors for `IndexIVFFlatPanorama` (verified by the constructor).
|
|
473
|
+
const float* vectors = reinterpret_cast<const float*>(code);
|
|
474
|
+
const size_t d = code_size / sizeof(float);
|
|
475
|
+
|
|
476
|
+
std::vector<float> suffix_sums(d + 1);
|
|
477
|
+
|
|
478
|
+
for (size_t entry_idx = 0; entry_idx < n_entry; entry_idx++) {
|
|
479
|
+
size_t current_pos = offset + entry_idx;
|
|
480
|
+
size_t batch_no = current_pos / kBatchSize;
|
|
481
|
+
size_t pos_in_batch = current_pos % kBatchSize;
|
|
482
|
+
|
|
483
|
+
const float* vector = vectors + entry_idx * d;
|
|
484
|
+
|
|
485
|
+
// Compute suffix sums of squared values.
|
|
486
|
+
suffix_sums[d] = 0.0f;
|
|
487
|
+
for (int j = d - 1; j >= 0; j--) {
|
|
488
|
+
float squared_val = vector[j] * vector[j];
|
|
489
|
+
suffix_sums[j] = suffix_sums[j + 1] + squared_val;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// Store cumulative sums in batch-oriented layout.
|
|
493
|
+
size_t cumsum_batch_offset = batch_no * kBatchSize * (n_levels + 1);
|
|
494
|
+
float* cumsum_base = cum_sums[list_no].data();
|
|
495
|
+
|
|
496
|
+
const size_t level_width_floats = level_width / sizeof(float);
|
|
497
|
+
for (size_t level = 0; level < n_levels; level++) {
|
|
498
|
+
size_t start_idx = level * level_width_floats;
|
|
499
|
+
size_t cumsum_offset =
|
|
500
|
+
cumsum_batch_offset + level * kBatchSize + pos_in_batch;
|
|
501
|
+
if (start_idx < d) {
|
|
502
|
+
cumsum_base[cumsum_offset] = sqrt(suffix_sums[start_idx]);
|
|
503
|
+
} else {
|
|
504
|
+
cumsum_base[cumsum_offset] = 0.0f;
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
// Last level sum is always 0.
|
|
509
|
+
size_t cumsum_offset =
|
|
510
|
+
cumsum_batch_offset + n_levels * kBatchSize + pos_in_batch;
|
|
511
|
+
cumsum_base[cumsum_offset] = 0.0f;
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
// Helper method to copy codes into level-oriented batch layout at a given
|
|
516
|
+
// offset in the list.
|
|
517
|
+
void ArrayInvertedListsPanorama::copy_codes_to_level_layout(
|
|
518
|
+
size_t list_no,
|
|
519
|
+
size_t offset,
|
|
520
|
+
size_t n_entry,
|
|
521
|
+
const uint8_t* code) {
|
|
522
|
+
uint8_t* codes_base = codes[list_no].data();
|
|
523
|
+
size_t current_pos = offset;
|
|
524
|
+
for (size_t entry_idx = 0; entry_idx < n_entry;) {
|
|
525
|
+
// Determine which batch we're in and position within that batch.
|
|
526
|
+
size_t batch_no = current_pos / kBatchSize;
|
|
527
|
+
size_t pos_in_batch = current_pos % kBatchSize;
|
|
528
|
+
size_t entries_in_this_batch =
|
|
529
|
+
std::min(n_entry - entry_idx, kBatchSize - pos_in_batch);
|
|
530
|
+
|
|
531
|
+
// Copy entries into level-oriented layout for this batch.
|
|
532
|
+
size_t batch_offset = batch_no * kBatchSize * code_size;
|
|
533
|
+
for (size_t level = 0; level < n_levels; level++) {
|
|
534
|
+
size_t level_offset = level * level_width * kBatchSize;
|
|
535
|
+
size_t start_byte = level * level_width;
|
|
536
|
+
size_t copy_size =
|
|
537
|
+
std::min(level_width, code_size - level * level_width);
|
|
538
|
+
|
|
539
|
+
for (size_t i = 0; i < entries_in_this_batch; i++) {
|
|
540
|
+
const uint8_t* src =
|
|
541
|
+
code + (entry_idx + i) * code_size + start_byte;
|
|
542
|
+
uint8_t* dest = codes_base + batch_offset + level_offset +
|
|
543
|
+
(pos_in_batch + i) * level_width;
|
|
544
|
+
|
|
545
|
+
memcpy(dest, src, copy_size);
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
entry_idx += entries_in_this_batch;
|
|
550
|
+
current_pos += entries_in_this_batch;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
|
|
349
554
|
/*****************************************************************
|
|
350
555
|
* Meta-inverted list implementations
|
|
351
556
|
*****************************************************************/
|
|
@@ -276,6 +276,68 @@ struct ArrayInvertedLists : InvertedLists {
|
|
|
276
276
|
~ArrayInvertedLists() override;
|
|
277
277
|
};
|
|
278
278
|
|
|
279
|
+
/// Level-oriented storage as defined in the IVFFlat section of Panorama
|
|
280
|
+
/// (https://www.arxiv.org/pdf/2510.00566).
|
|
281
|
+
struct ArrayInvertedListsPanorama : ArrayInvertedLists {
|
|
282
|
+
static constexpr size_t kBatchSize = 128;
|
|
283
|
+
std::vector<MaybeOwnedVector<float>> cum_sums;
|
|
284
|
+
const size_t n_levels;
|
|
285
|
+
const size_t level_width; // in code units
|
|
286
|
+
|
|
287
|
+
ArrayInvertedListsPanorama(size_t nlist, size_t code_size, size_t n_levels);
|
|
288
|
+
|
|
289
|
+
const float* get_cum_sums(size_t list_no) const;
|
|
290
|
+
|
|
291
|
+
size_t add_entries(
|
|
292
|
+
size_t list_no,
|
|
293
|
+
size_t n_entry,
|
|
294
|
+
const idx_t* ids,
|
|
295
|
+
const uint8_t* code) override;
|
|
296
|
+
|
|
297
|
+
void update_entries(
|
|
298
|
+
size_t list_no,
|
|
299
|
+
size_t offset,
|
|
300
|
+
size_t n_entry,
|
|
301
|
+
const idx_t* ids,
|
|
302
|
+
const uint8_t* code) override;
|
|
303
|
+
|
|
304
|
+
void resize(size_t list_no, size_t new_size) override;
|
|
305
|
+
|
|
306
|
+
/// Panorama's layout make it impractical to support iterators as defined
|
|
307
|
+
/// by Faiss (i.e. `InvertedListsIterator` API). The iterator would require
|
|
308
|
+
/// to allocate and reassemble the vector at each call.
|
|
309
|
+
/// Hence, we override this method to throw an error, this effectively
|
|
310
|
+
/// disables the `iterate_codes` and `iterate_codes_range` methods.
|
|
311
|
+
InvertedListsIterator* get_iterator(
|
|
312
|
+
size_t list_no,
|
|
313
|
+
void* inverted_list_context = nullptr) const override;
|
|
314
|
+
|
|
315
|
+
/// Reconstructs a single code from level-oriented storage to flat format.
|
|
316
|
+
const uint8_t* get_single_code(size_t list_no, size_t offset)
|
|
317
|
+
const override;
|
|
318
|
+
|
|
319
|
+
/// Frees codes returned by `get_single_code`.
|
|
320
|
+
void release_codes(size_t list_no, const uint8_t* codes) const override;
|
|
321
|
+
|
|
322
|
+
private:
|
|
323
|
+
/// Helper method to copy codes into level-oriented batch layout at a given
|
|
324
|
+
/// offset in the list.
|
|
325
|
+
void copy_codes_to_level_layout(
|
|
326
|
+
size_t list_no,
|
|
327
|
+
size_t offset,
|
|
328
|
+
size_t n_entry,
|
|
329
|
+
const uint8_t* code);
|
|
330
|
+
|
|
331
|
+
/// Helper method to compute the cumulative sums of the codes.
|
|
332
|
+
/// The cumsums also follow the level-oriented batch layout to minimize the
|
|
333
|
+
/// number of random memory accesses.
|
|
334
|
+
void compute_cumulative_sums(
|
|
335
|
+
size_t list_no,
|
|
336
|
+
size_t offset,
|
|
337
|
+
size_t n_entry,
|
|
338
|
+
const uint8_t* code);
|
|
339
|
+
};
|
|
340
|
+
|
|
279
341
|
/*****************************************************************
|
|
280
342
|
* Meta-inverted lists
|
|
281
343
|
*
|
|
@@ -25,7 +25,7 @@ inline bool is_aligned_pointer(const void* x) {
|
|
|
25
25
|
}
|
|
26
26
|
|
|
27
27
|
// class that manages suitably aligned arrays for SIMD
|
|
28
|
-
// T should be a
|
|
28
|
+
// T should be a POD type. The default alignment is 32 for AVX
|
|
29
29
|
template <class T, int A = 32>
|
|
30
30
|
struct AlignedTableTightAlloc {
|
|
31
31
|
T* ptr;
|
|
@@ -139,7 +139,7 @@ void HeapArray<C>::per_line_extrema(T* out_val, TI* out_ids) const {
|
|
|
139
139
|
}
|
|
140
140
|
}
|
|
141
141
|
|
|
142
|
-
// explicit
|
|
142
|
+
// explicit instantiations
|
|
143
143
|
|
|
144
144
|
template struct HeapArray<CMin<float, int64_t>>;
|
|
145
145
|
template struct HeapArray<CMax<float, int64_t>>;
|
|
@@ -238,7 +238,7 @@ void merge_knn_results(
|
|
|
238
238
|
}
|
|
239
239
|
}
|
|
240
240
|
|
|
241
|
-
// explicit
|
|
241
|
+
// explicit instantiations
|
|
242
242
|
#define INSTANTIATE(C, distance_t) \
|
|
243
243
|
template void merge_knn_results<int64_t, C<distance_t, int>>( \
|
|
244
244
|
size_t, \
|
|
@@ -150,7 +150,7 @@ inline void heap_replace_top(
|
|
|
150
150
|
bh_ids[i] = id;
|
|
151
151
|
}
|
|
152
152
|
|
|
153
|
-
/* Partial
|
|
153
|
+
/* Partial instantiation for heaps with TI = int64_t */
|
|
154
154
|
|
|
155
155
|
template <typename T>
|
|
156
156
|
inline void minheap_pop(size_t k, T* bh_val, int64_t* bh_ids) {
|
|
@@ -393,7 +393,7 @@ inline void heap_addn(
|
|
|
393
393
|
}
|
|
394
394
|
}
|
|
395
395
|
|
|
396
|
-
/* Partial
|
|
396
|
+
/* Partial instantiation for heaps with TI = int64_t */
|
|
397
397
|
|
|
398
398
|
template <typename T>
|
|
399
399
|
inline void minheap_addn(
|
|
@@ -489,7 +489,7 @@ struct HeapArray {
|
|
|
489
489
|
return val + key * k;
|
|
490
490
|
}
|
|
491
491
|
|
|
492
|
-
///
|
|
492
|
+
/// Corresponding identifiers
|
|
493
493
|
TI* get_ids(size_t key) {
|
|
494
494
|
return ids + key * k;
|
|
495
495
|
}
|
|
@@ -75,7 +75,7 @@ struct Embedding {
|
|
|
75
75
|
};
|
|
76
76
|
|
|
77
77
|
/// Feed forward layer that expands to a hidden dimension, applies a ReLU non
|
|
78
|
-
/// linearity and maps back to the
|
|
78
|
+
/// linearity and maps back to the original dimension
|
|
79
79
|
struct FFN {
|
|
80
80
|
Linear linear1, linear2;
|
|
81
81
|
|
|
@@ -103,7 +103,7 @@ struct QINCoStep {
|
|
|
103
103
|
return residual_blocks[i];
|
|
104
104
|
}
|
|
105
105
|
|
|
106
|
-
/** encode a set of vectors x with
|
|
106
|
+
/** encode a set of vectors x with initial estimate xhat. Optionally return
|
|
107
107
|
* the delta to be added to xhat to form the new xhat */
|
|
108
108
|
nn::Int32Tensor2D encode(
|
|
109
109
|
const nn::Tensor2D& xhat,
|
|
@@ -141,7 +141,7 @@ struct QINCo : NeuralNetCodec {
|
|
|
141
141
|
|
|
142
142
|
nn::Int32Tensor2D encode(const nn::Tensor2D& x) const override;
|
|
143
143
|
|
|
144
|
-
virtual ~QINCo() {}
|
|
144
|
+
virtual ~QINCo() override {}
|
|
145
145
|
};
|
|
146
146
|
|
|
147
147
|
} // namespace faiss
|
|
@@ -50,8 +50,8 @@
|
|
|
50
50
|
// for j in range(0, NBUCKETS):
|
|
51
51
|
// idx = beam * n + i * NBUCKETS + j
|
|
52
52
|
// if distances[idx] < local_min_distances[j]:
|
|
53
|
-
// local_min_distances[
|
|
54
|
-
// local_min_indices[
|
|
53
|
+
// local_min_distances[j] = distances[idx]
|
|
54
|
+
// local_min_indices[j] = indices[idx]
|
|
55
55
|
//
|
|
56
56
|
// for j in range(0, NBUCKETS):
|
|
57
57
|
// heap.push(local_min_distances[j], local_min_indices[j])
|
|
@@ -106,7 +106,7 @@ struct HeapWithBuckets<CMax<float, int>, NBUCKETS, N> {
|
|
|
106
106
|
distance_candidate,
|
|
107
107
|
_CMP_LE_OS);
|
|
108
108
|
|
|
109
|
-
// // blend seems to be slower
|
|
109
|
+
// // blend seems to be slower than min
|
|
110
110
|
// const __m256 min_distances_new = _mm256_blendv_ps(
|
|
111
111
|
// distance_candidate,
|
|
112
112
|
// min_distances_i[j][p],
|
|
@@ -120,7 +120,7 @@ struct HeapWithBuckets<CMax<float, int>, NBUCKETS, N> {
|
|
|
120
120
|
min_indices_i[j][p]),
|
|
121
121
|
comparison));
|
|
122
122
|
|
|
123
|
-
// // blend seems to be slower
|
|
123
|
+
// // blend seems to be slower than min
|
|
124
124
|
// const __m256 max_distances_new = _mm256_blendv_ps(
|
|
125
125
|
// min_distances_i[j][p],
|
|
126
126
|
// distance_candidate,
|
|
@@ -21,7 +21,7 @@
|
|
|
21
21
|
/// It seems that only the limited number of combinations are
|
|
22
22
|
/// meaningful, because of the limited supply of SIMD registers.
|
|
23
23
|
/// Also, certain combinations, such as B32_D1 and B16_D1, were concluded
|
|
24
|
-
/// to be not very precise in benchmarks, so
|
|
24
|
+
/// to be not very precise in benchmarks, so they were not introduced.
|
|
25
25
|
///
|
|
26
26
|
/// TODO: Consider d-ary SIMD heap.
|
|
27
27
|
|
|
@@ -324,7 +324,7 @@ void knn_inner_product(
|
|
|
324
324
|
* vector y, for the L2 distance
|
|
325
325
|
* @param x query vectors, size nx * d
|
|
326
326
|
* @param y database vectors, size ny * d
|
|
327
|
-
* @param res result heap
|
|
327
|
+
* @param res result heap structure, which also provides k. Sorted on output
|
|
328
328
|
* @param y_norm2 (optional) norms for the y vectors (nullptr or size ny)
|
|
329
329
|
* @param sel search in this subset of vectors
|
|
330
330
|
*/
|
|
@@ -389,7 +389,7 @@ void knn_inner_products_by_idx(
|
|
|
389
389
|
* @param x query vectors, size nx * d
|
|
390
390
|
* @param y database vectors, size (max(ids) + 1) * d
|
|
391
391
|
* @param subset subset of database vectors to consider, size (nx, nsubset)
|
|
392
|
-
* @param res
|
|
392
|
+
* @param res result structure
|
|
393
393
|
* @param ld_subset stride for the subset array. -1: use nsubset, 0: all queries
|
|
394
394
|
* process the same subset
|
|
395
395
|
*/
|
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
8
10
|
/** In this file are the implementations of extra metrics beyond L2
|
|
9
11
|
* and inner product */
|
|
10
12
|
|
|
@@ -188,7 +190,7 @@ inline float VectorDistance<METRIC_GOWER>::operator()(
|
|
|
188
190
|
|
|
189
191
|
/***************************************************************************
|
|
190
192
|
* Dispatching function that takes a metric type and a consumer object
|
|
191
|
-
* the consumer object should contain a
|
|
193
|
+
* the consumer object should contain a return type T and a operation template
|
|
192
194
|
* function f() that is called to perform the operation. The first argument
|
|
193
195
|
* of the function is the VectorDistance object. The rest are passed in as is.
|
|
194
196
|
**************************************************************************/
|
|
@@ -257,12 +257,13 @@ void hammings_knn_mc(
|
|
|
257
257
|
|
|
258
258
|
std::vector<HCounterState<HammingComputer>> cs;
|
|
259
259
|
for (size_t i = 0; i < na; ++i) {
|
|
260
|
-
cs.push_back(
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
260
|
+
cs.push_back(
|
|
261
|
+
HCounterState<HammingComputer>(
|
|
262
|
+
all_counters.data() + i * nBuckets,
|
|
263
|
+
all_ids_per_dis.get() + i * nBuckets * k,
|
|
264
|
+
a + i * bytes_per_code,
|
|
265
|
+
8 * bytes_per_code,
|
|
266
|
+
k));
|
|
266
267
|
}
|
|
267
268
|
|
|
268
269
|
const size_t block_size = hamming_batch_size;
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
* fvecs2bitvecs).
|
|
15
15
|
*
|
|
16
16
|
* User-defined type hamdis_t is used for distances because at this time
|
|
17
|
-
* it is still
|
|
17
|
+
* it is still unclear clear how we will need to balance
|
|
18
18
|
* - flexibility in vector size (may need 16- or even 8-bit vectors)
|
|
19
19
|
* - memory usage
|
|
20
20
|
* - cache-misses when dealing with large volumes of data (fewer bits is better)
|
|
@@ -30,8 +30,7 @@ inline int popcount64(uint64_t x) {
|
|
|
30
30
|
// This table was moved from .cpp to .h file, because
|
|
31
31
|
// otherwise it was causing compilation errors while trying to
|
|
32
32
|
// compile swig modules on Windows.
|
|
33
|
-
|
|
34
|
-
static constexpr uint8_t hamdis_tab_ham_bytes[256] = {
|
|
33
|
+
inline constexpr uint8_t hamdis_tab_ham_bytes[256] = {
|
|
35
34
|
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
|
|
36
35
|
2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
|
37
36
|
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
|
|
@@ -140,7 +140,7 @@ typename C::T partition_fuzzy_median3(
|
|
|
140
140
|
using T = typename C::T;
|
|
141
141
|
|
|
142
142
|
// here we use bissection with a median of 3 to find the threshold and
|
|
143
|
-
// compress the arrays afterwards. So it's a n*log(n)
|
|
143
|
+
// compress the arrays afterwards. So it's a n*log(n) algorithm rather than
|
|
144
144
|
// qselect's O(n) but it avoids shuffling around the array.
|
|
145
145
|
|
|
146
146
|
FAISS_THROW_IF_NOT(n >= 3);
|
|
@@ -350,7 +350,7 @@ int simd_compress_array(
|
|
|
350
350
|
}
|
|
351
351
|
}
|
|
352
352
|
|
|
353
|
-
// handle remaining, only
|
|
353
|
+
// handle remaining, only strictly lt ones.
|
|
354
354
|
for (; i0 + 15 < n; i0 += 16) {
|
|
355
355
|
simd16uint16 v(vals + i0);
|
|
356
356
|
simd16uint16 max2 = max_func<C>(v, thr16);
|
|
@@ -506,7 +506,7 @@ uint16_t simd_partition_fuzzy_with_bounds(
|
|
|
506
506
|
|
|
507
507
|
uint64_t t2 = get_cy();
|
|
508
508
|
|
|
509
|
-
partition_stats.
|
|
509
|
+
partition_stats.bisect_cycles += t1 - t0;
|
|
510
510
|
partition_stats.compress_cycles += t2 - t1;
|
|
511
511
|
|
|
512
512
|
return thresh;
|
|
@@ -662,7 +662,7 @@ uint16_t simd_partition_fuzzy_with_bounds_histogram(
|
|
|
662
662
|
}
|
|
663
663
|
}
|
|
664
664
|
|
|
665
|
-
IFV printf("end
|
|
665
|
+
IFV printf("end bisection: thresh=%d q=%ld n_eq=%ld\n", thresh, q, n_eq);
|
|
666
666
|
|
|
667
667
|
if (!C::is_max) {
|
|
668
668
|
if (n_eq == 0) {
|
|
@@ -762,7 +762,7 @@ typename C::T partition_fuzzy(
|
|
|
762
762
|
vals, ids, n, q_min, q_max, q_out);
|
|
763
763
|
}
|
|
764
764
|
|
|
765
|
-
// explicit template
|
|
765
|
+
// explicit template instantiations
|
|
766
766
|
|
|
767
767
|
template float partition_fuzzy<CMin<float, int64_t>>(
|
|
768
768
|
float* vals,
|