faiss 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +2 -0
- data/ext/faiss/index.cpp +8 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/IVFlib.cpp +25 -49
- data/vendor/faiss/faiss/Index.cpp +11 -0
- data/vendor/faiss/faiss/Index.h +24 -1
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +1 -0
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
- data/vendor/faiss/faiss/IndexFastScan.cpp +1 -1
- data/vendor/faiss/faiss/IndexFastScan.h +3 -8
- data/vendor/faiss/faiss/IndexFlat.cpp +374 -4
- data/vendor/faiss/faiss/IndexFlat.h +80 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +90 -1
- data/vendor/faiss/faiss/IndexHNSW.h +57 -1
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +34 -149
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +86 -2
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +3 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +293 -115
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +52 -16
- data/vendor/faiss/faiss/IndexPQ.cpp +4 -1
- data/vendor/faiss/faiss/IndexPreTransform.cpp +14 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +9 -0
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +96 -16
- data/vendor/faiss/faiss/IndexRaBitQ.h +5 -1
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +238 -93
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +35 -9
- data/vendor/faiss/faiss/IndexRefine.cpp +49 -0
- data/vendor/faiss/faiss/IndexRefine.h +17 -0
- data/vendor/faiss/faiss/clone_index.cpp +2 -0
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +3 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +1 -1
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +1 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +74 -3
- data/vendor/faiss/faiss/impl/HNSW.cpp +294 -15
- data/vendor/faiss/faiss/impl/HNSW.h +31 -2
- data/vendor/faiss/faiss/impl/IDSelector.h +3 -3
- data/vendor/faiss/faiss/impl/Panorama.cpp +193 -0
- data/vendor/faiss/faiss/impl/Panorama.h +204 -0
- data/vendor/faiss/faiss/impl/RaBitQStats.cpp +29 -0
- data/vendor/faiss/faiss/impl/RaBitQStats.h +56 -0
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +54 -6
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +183 -6
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +269 -84
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +71 -4
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +362 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +112 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +6 -9
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +1 -3
- data/vendor/faiss/faiss/impl/index_read.cpp +156 -12
- data/vendor/faiss/faiss/impl/index_write.cpp +142 -19
- data/vendor/faiss/faiss/impl/platform_macros.h +12 -0
- data/vendor/faiss/faiss/impl/svs_io.cpp +86 -0
- data/vendor/faiss/faiss/impl/svs_io.h +67 -0
- data/vendor/faiss/faiss/index_factory.cpp +182 -15
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +18 -109
- data/vendor/faiss/faiss/invlists/InvertedLists.h +2 -18
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +1 -1
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +261 -0
- data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +117 -0
- data/vendor/faiss/faiss/svs/IndexSVSFlat.h +66 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +245 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamana.h +137 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +39 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +42 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +149 -0
- data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +58 -0
- data/vendor/faiss/faiss/utils/distances.cpp +0 -3
- data/vendor/faiss/faiss/utils/utils.cpp +4 -0
- metadata +18 -1
|
@@ -11,12 +11,15 @@
|
|
|
11
11
|
|
|
12
12
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
13
13
|
#include <faiss/impl/FaissAssert.h>
|
|
14
|
+
#include <faiss/impl/ResultHandler.h>
|
|
14
15
|
#include <faiss/utils/Heap.h>
|
|
15
16
|
#include <faiss/utils/distances.h>
|
|
16
17
|
#include <faiss/utils/extra_distances.h>
|
|
17
18
|
#include <faiss/utils/prefetch.h>
|
|
18
19
|
#include <faiss/utils/sorting.h>
|
|
20
|
+
#include <omp.h>
|
|
19
21
|
#include <cstring>
|
|
22
|
+
#include <numeric>
|
|
20
23
|
|
|
21
24
|
namespace faiss {
|
|
22
25
|
|
|
@@ -100,15 +103,24 @@ namespace {
|
|
|
100
103
|
struct FlatL2Dis : FlatCodesDistanceComputer {
|
|
101
104
|
size_t d;
|
|
102
105
|
idx_t nb;
|
|
103
|
-
const float* q;
|
|
104
106
|
const float* b;
|
|
105
107
|
size_t ndis;
|
|
108
|
+
size_t npartial_dot_products;
|
|
106
109
|
|
|
107
110
|
float distance_to_code(const uint8_t* code) final {
|
|
108
111
|
ndis++;
|
|
109
112
|
return fvec_L2sqr(q, (float*)code, d);
|
|
110
113
|
}
|
|
111
114
|
|
|
115
|
+
float partial_dot_product(
|
|
116
|
+
const idx_t i,
|
|
117
|
+
const uint32_t offset,
|
|
118
|
+
const uint32_t num_components) final override {
|
|
119
|
+
npartial_dot_products++;
|
|
120
|
+
return fvec_inner_product(
|
|
121
|
+
q + offset, b + i * d + offset, num_components);
|
|
122
|
+
}
|
|
123
|
+
|
|
112
124
|
float symmetric_dis(idx_t i, idx_t j) override {
|
|
113
125
|
return fvec_L2sqr(b + j * d, b + i * d, d);
|
|
114
126
|
}
|
|
@@ -116,12 +128,13 @@ struct FlatL2Dis : FlatCodesDistanceComputer {
|
|
|
116
128
|
explicit FlatL2Dis(const IndexFlat& storage, const float* q = nullptr)
|
|
117
129
|
: FlatCodesDistanceComputer(
|
|
118
130
|
storage.codes.data(),
|
|
119
|
-
storage.code_size
|
|
131
|
+
storage.code_size,
|
|
132
|
+
q),
|
|
120
133
|
d(storage.d),
|
|
121
134
|
nb(storage.ntotal),
|
|
122
|
-
q(q),
|
|
123
135
|
b(storage.get_xb()),
|
|
124
|
-
ndis(0)
|
|
136
|
+
ndis(0),
|
|
137
|
+
npartial_dot_products(0) {}
|
|
125
138
|
|
|
126
139
|
void set_query(const float* x) override {
|
|
127
140
|
q = x;
|
|
@@ -159,6 +172,50 @@ struct FlatL2Dis : FlatCodesDistanceComputer {
|
|
|
159
172
|
dis2 = dp2;
|
|
160
173
|
dis3 = dp3;
|
|
161
174
|
}
|
|
175
|
+
|
|
176
|
+
void partial_dot_product_batch_4(
|
|
177
|
+
const idx_t idx0,
|
|
178
|
+
const idx_t idx1,
|
|
179
|
+
const idx_t idx2,
|
|
180
|
+
const idx_t idx3,
|
|
181
|
+
float& dp0,
|
|
182
|
+
float& dp1,
|
|
183
|
+
float& dp2,
|
|
184
|
+
float& dp3,
|
|
185
|
+
const uint32_t offset,
|
|
186
|
+
const uint32_t num_components) final override {
|
|
187
|
+
npartial_dot_products += 4;
|
|
188
|
+
|
|
189
|
+
// compute first, assign next
|
|
190
|
+
const float* __restrict y0 =
|
|
191
|
+
reinterpret_cast<const float*>(codes + idx0 * code_size);
|
|
192
|
+
const float* __restrict y1 =
|
|
193
|
+
reinterpret_cast<const float*>(codes + idx1 * code_size);
|
|
194
|
+
const float* __restrict y2 =
|
|
195
|
+
reinterpret_cast<const float*>(codes + idx2 * code_size);
|
|
196
|
+
const float* __restrict y3 =
|
|
197
|
+
reinterpret_cast<const float*>(codes + idx3 * code_size);
|
|
198
|
+
|
|
199
|
+
float dp0_ = 0;
|
|
200
|
+
float dp1_ = 0;
|
|
201
|
+
float dp2_ = 0;
|
|
202
|
+
float dp3_ = 0;
|
|
203
|
+
fvec_inner_product_batch_4(
|
|
204
|
+
q + offset,
|
|
205
|
+
y0 + offset,
|
|
206
|
+
y1 + offset,
|
|
207
|
+
y2 + offset,
|
|
208
|
+
y3 + offset,
|
|
209
|
+
num_components,
|
|
210
|
+
dp0_,
|
|
211
|
+
dp1_,
|
|
212
|
+
dp2_,
|
|
213
|
+
dp3_);
|
|
214
|
+
dp0 = dp0_;
|
|
215
|
+
dp1 = dp1_;
|
|
216
|
+
dp2 = dp2_;
|
|
217
|
+
dp3 = dp3_;
|
|
218
|
+
}
|
|
162
219
|
};
|
|
163
220
|
|
|
164
221
|
struct FlatIPDis : FlatCodesDistanceComputer {
|
|
@@ -519,4 +576,317 @@ void IndexFlat1D::search(
|
|
|
519
576
|
done:;
|
|
520
577
|
}
|
|
521
578
|
}
|
|
579
|
+
|
|
580
|
+
/**************************************************************
|
|
581
|
+
* shared flat Panorama search code
|
|
582
|
+
**************************************************************/
|
|
583
|
+
|
|
584
|
+
namespace {
|
|
585
|
+
|
|
586
|
+
template <bool use_radius, typename BlockHandler>
|
|
587
|
+
inline void flat_pano_search_core(
|
|
588
|
+
const IndexFlatPanorama& index,
|
|
589
|
+
BlockHandler& handler,
|
|
590
|
+
idx_t n,
|
|
591
|
+
const float* x,
|
|
592
|
+
float radius,
|
|
593
|
+
const SearchParameters* params) {
|
|
594
|
+
using SingleResultHandler = typename BlockHandler::SingleResultHandler;
|
|
595
|
+
|
|
596
|
+
IDSelector* sel = params ? params->sel : nullptr;
|
|
597
|
+
bool use_sel = sel != nullptr;
|
|
598
|
+
|
|
599
|
+
[[maybe_unused]] int nt = std::min(int(n), omp_get_max_threads());
|
|
600
|
+
size_t n_batches = (index.ntotal + index.batch_size - 1) / index.batch_size;
|
|
601
|
+
|
|
602
|
+
#pragma omp parallel num_threads(nt)
|
|
603
|
+
{
|
|
604
|
+
SingleResultHandler res(handler);
|
|
605
|
+
|
|
606
|
+
std::vector<float> query_cum_norms(index.n_levels + 1);
|
|
607
|
+
std::vector<float> exact_distances(index.batch_size);
|
|
608
|
+
std::vector<uint32_t> active_indices(index.batch_size);
|
|
609
|
+
|
|
610
|
+
#pragma omp for
|
|
611
|
+
for (int64_t i = 0; i < n; i++) {
|
|
612
|
+
const float* xi = x + i * index.d;
|
|
613
|
+
index.pano.compute_query_cum_sums(xi, query_cum_norms.data());
|
|
614
|
+
|
|
615
|
+
PanoramaStats local_stats;
|
|
616
|
+
local_stats.reset();
|
|
617
|
+
|
|
618
|
+
res.begin(i);
|
|
619
|
+
|
|
620
|
+
for (size_t batch_no = 0; batch_no < n_batches; batch_no++) {
|
|
621
|
+
size_t batch_start = batch_no * index.batch_size;
|
|
622
|
+
|
|
623
|
+
float threshold;
|
|
624
|
+
if constexpr (use_radius) {
|
|
625
|
+
threshold = radius;
|
|
626
|
+
} else {
|
|
627
|
+
threshold = res.heap_dis[0];
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
size_t num_active =
|
|
631
|
+
index.pano
|
|
632
|
+
.progressive_filter_batch<CMax<float, int64_t>>(
|
|
633
|
+
index.codes.data(),
|
|
634
|
+
index.cum_sums.data(),
|
|
635
|
+
xi,
|
|
636
|
+
query_cum_norms.data(),
|
|
637
|
+
batch_no,
|
|
638
|
+
index.ntotal,
|
|
639
|
+
sel,
|
|
640
|
+
nullptr,
|
|
641
|
+
use_sel,
|
|
642
|
+
active_indices,
|
|
643
|
+
exact_distances,
|
|
644
|
+
threshold,
|
|
645
|
+
local_stats);
|
|
646
|
+
|
|
647
|
+
for (size_t j = 0; j < num_active; j++) {
|
|
648
|
+
res.add_result(
|
|
649
|
+
exact_distances[active_indices[j]],
|
|
650
|
+
batch_start + active_indices[j]);
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
res.end();
|
|
655
|
+
indexPanorama_stats.add(local_stats);
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
} // anonymous namespace
|
|
661
|
+
|
|
662
|
+
/***************************************************
|
|
663
|
+
* IndexFlatPanorama
|
|
664
|
+
***************************************************/
|
|
665
|
+
|
|
666
|
+
void IndexFlatPanorama::add(idx_t n, const float* x) {
|
|
667
|
+
size_t offset = ntotal;
|
|
668
|
+
ntotal += n;
|
|
669
|
+
size_t num_batches = (ntotal + batch_size - 1) / batch_size;
|
|
670
|
+
|
|
671
|
+
codes.resize(num_batches * batch_size * code_size);
|
|
672
|
+
cum_sums.resize(num_batches * batch_size * (n_levels + 1));
|
|
673
|
+
|
|
674
|
+
const uint8_t* code = reinterpret_cast<const uint8_t*>(x);
|
|
675
|
+
pano.copy_codes_to_level_layout(codes.data(), offset, n, code);
|
|
676
|
+
pano.compute_cumulative_sums(cum_sums.data(), offset, n, x);
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
void IndexFlatPanorama::search(
|
|
680
|
+
idx_t n,
|
|
681
|
+
const float* x,
|
|
682
|
+
idx_t k,
|
|
683
|
+
float* distances,
|
|
684
|
+
idx_t* labels,
|
|
685
|
+
const SearchParameters* params) const {
|
|
686
|
+
FAISS_THROW_IF_NOT(k > 0);
|
|
687
|
+
FAISS_THROW_IF_NOT(batch_size >= k);
|
|
688
|
+
|
|
689
|
+
HeapBlockResultHandler<CMax<float, int64_t>, false> handler(
|
|
690
|
+
size_t(n), distances, labels, size_t(k), nullptr);
|
|
691
|
+
|
|
692
|
+
flat_pano_search_core<false>(*this, handler, n, x, 0.0f, params);
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
void IndexFlatPanorama::range_search(
|
|
696
|
+
idx_t n,
|
|
697
|
+
const float* x,
|
|
698
|
+
float radius,
|
|
699
|
+
RangeSearchResult* result,
|
|
700
|
+
const SearchParameters* params) const {
|
|
701
|
+
RangeSearchBlockResultHandler<CMax<float, int64_t>, false> handler(
|
|
702
|
+
result, radius, nullptr);
|
|
703
|
+
|
|
704
|
+
flat_pano_search_core<true>(*this, handler, n, x, radius, params);
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
void IndexFlatPanorama::reset() {
|
|
708
|
+
IndexFlat::reset();
|
|
709
|
+
cum_sums.clear();
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
void IndexFlatPanorama::reconstruct(idx_t key, float* recons) const {
|
|
713
|
+
pano.reconstruct(key, recons, codes.data());
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
void IndexFlatPanorama::reconstruct_n(idx_t i, idx_t n, float* recons) const {
|
|
717
|
+
Index::reconstruct_n(i, n, recons);
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
size_t IndexFlatPanorama::remove_ids(const IDSelector& sel) {
|
|
721
|
+
idx_t j = 0;
|
|
722
|
+
for (idx_t i = 0; i < ntotal; i++) {
|
|
723
|
+
if (sel.is_member(i)) {
|
|
724
|
+
// should be removed
|
|
725
|
+
} else {
|
|
726
|
+
if (i > j) {
|
|
727
|
+
pano.copy_entry(
|
|
728
|
+
codes.data(),
|
|
729
|
+
codes.data(),
|
|
730
|
+
cum_sums.data(),
|
|
731
|
+
cum_sums.data(),
|
|
732
|
+
j,
|
|
733
|
+
i);
|
|
734
|
+
}
|
|
735
|
+
j++;
|
|
736
|
+
}
|
|
737
|
+
}
|
|
738
|
+
size_t nremove = ntotal - j;
|
|
739
|
+
if (nremove > 0) {
|
|
740
|
+
ntotal = j;
|
|
741
|
+
size_t num_batches = (ntotal + batch_size - 1) / batch_size;
|
|
742
|
+
codes.resize(num_batches * batch_size * code_size);
|
|
743
|
+
cum_sums.resize(num_batches * batch_size * (n_levels + 1));
|
|
744
|
+
}
|
|
745
|
+
return nremove;
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
void IndexFlatPanorama::merge_from(Index& otherIndex, idx_t add_id) {
|
|
749
|
+
FAISS_THROW_IF_NOT_MSG(add_id == 0, "cannot set ids in FlatPanorama index");
|
|
750
|
+
check_compatible_for_merge(otherIndex);
|
|
751
|
+
IndexFlatPanorama* other = static_cast<IndexFlatPanorama*>(&otherIndex);
|
|
752
|
+
|
|
753
|
+
std::vector<float> buffer(other->ntotal * code_size);
|
|
754
|
+
otherIndex.reconstruct_n(0, other->ntotal, buffer.data());
|
|
755
|
+
|
|
756
|
+
add(other->ntotal, buffer.data());
|
|
757
|
+
other->reset();
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
void IndexFlatPanorama::add_sa_codes(
|
|
761
|
+
idx_t /* n */,
|
|
762
|
+
const uint8_t* /* codes_in */,
|
|
763
|
+
const idx_t* /* xids */) {
|
|
764
|
+
FAISS_THROW_MSG("add_sa_codes not implemented for IndexFlatPanorama");
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
void IndexFlatPanorama::permute_entries(const idx_t* perm) {
|
|
768
|
+
MaybeOwnedVector<uint8_t> new_codes(codes.size());
|
|
769
|
+
std::vector<float> new_cum_sums(cum_sums.size());
|
|
770
|
+
|
|
771
|
+
for (idx_t i = 0; i < ntotal; i++) {
|
|
772
|
+
pano.copy_entry(
|
|
773
|
+
new_codes.data(),
|
|
774
|
+
codes.data(),
|
|
775
|
+
new_cum_sums.data(),
|
|
776
|
+
cum_sums.data(),
|
|
777
|
+
i,
|
|
778
|
+
perm[i]);
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
std::swap(codes, new_codes);
|
|
782
|
+
std::swap(cum_sums, new_cum_sums);
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
void IndexFlatPanorama::search_subset(
|
|
786
|
+
idx_t n,
|
|
787
|
+
const float* x,
|
|
788
|
+
idx_t k_base,
|
|
789
|
+
const idx_t* base_labels,
|
|
790
|
+
idx_t k,
|
|
791
|
+
float* distances,
|
|
792
|
+
idx_t* labels) const {
|
|
793
|
+
using SingleResultHandler =
|
|
794
|
+
HeapBlockResultHandler<CMax<float, int64_t>, false>::
|
|
795
|
+
SingleResultHandler;
|
|
796
|
+
HeapBlockResultHandler<CMax<float, int64_t>, false> handler(
|
|
797
|
+
size_t(n), distances, labels, size_t(k), nullptr);
|
|
798
|
+
|
|
799
|
+
FAISS_THROW_IF_NOT(k > 0);
|
|
800
|
+
FAISS_THROW_IF_NOT(batch_size == 1);
|
|
801
|
+
|
|
802
|
+
[[maybe_unused]] int nt = std::min(int(n), omp_get_max_threads());
|
|
803
|
+
|
|
804
|
+
#pragma omp parallel num_threads(nt)
|
|
805
|
+
{
|
|
806
|
+
SingleResultHandler res(handler);
|
|
807
|
+
|
|
808
|
+
std::vector<float> query_cum_norms(n_levels + 1);
|
|
809
|
+
|
|
810
|
+
// Panorama's optimized point-wise refinement (Algorithm 2):
|
|
811
|
+
// Batch-wise Panorama, as implemented in Panorama.h, incurs overhead
|
|
812
|
+
// from maintaining active_indices and exact_distances. This optimized
|
|
813
|
+
// implementation has minimal overhead and is thus preferred for
|
|
814
|
+
// IndexRefine's use case.
|
|
815
|
+
// 1. Initialize exact distance as ||y||^2 + ||x||^2.
|
|
816
|
+
// 2. For each level, refine distance incrementally:
|
|
817
|
+
// - Compute dot product for current level: exact_dist -= 2*<x,y>.
|
|
818
|
+
// - Use Cauchy-Schwarz bound on remaining levels to get lower bound.
|
|
819
|
+
// - If there are less than k points in the heap, add the point to
|
|
820
|
+
// the heap.
|
|
821
|
+
// - Else, prune if lower bound exceeds k-th best distance.
|
|
822
|
+
// 3. After all levels, update heap if the point survived.
|
|
823
|
+
#pragma omp for
|
|
824
|
+
for (idx_t i = 0; i < n; i++) {
|
|
825
|
+
const idx_t* __restrict idsi = base_labels + i * k_base;
|
|
826
|
+
const float* xi = x + i * d;
|
|
827
|
+
|
|
828
|
+
PanoramaStats local_stats;
|
|
829
|
+
local_stats.reset();
|
|
830
|
+
|
|
831
|
+
pano.compute_query_cum_sums(xi, query_cum_norms.data());
|
|
832
|
+
float query_cum_norm = query_cum_norms[0] * query_cum_norms[0];
|
|
833
|
+
|
|
834
|
+
res.begin(i);
|
|
835
|
+
|
|
836
|
+
for (size_t j = 0; j < k_base; j++) {
|
|
837
|
+
idx_t idx = idsi[j];
|
|
838
|
+
|
|
839
|
+
if (idx < 0) {
|
|
840
|
+
continue;
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
size_t cum_sum_offset = (n_levels + 1) * idx;
|
|
844
|
+
float cum_sum = cum_sums[cum_sum_offset];
|
|
845
|
+
float exact_distance = cum_sum * cum_sum + query_cum_norm;
|
|
846
|
+
cum_sum_offset++;
|
|
847
|
+
|
|
848
|
+
const float* x_ptr = xi;
|
|
849
|
+
const float* p_ptr =
|
|
850
|
+
reinterpret_cast<const float*>(codes.data()) + d * idx;
|
|
851
|
+
|
|
852
|
+
local_stats.total_dims += d;
|
|
853
|
+
|
|
854
|
+
bool pruned = false;
|
|
855
|
+
for (size_t level = 0; level < n_levels; level++) {
|
|
856
|
+
local_stats.total_dims_scanned += pano.level_width_floats;
|
|
857
|
+
|
|
858
|
+
// Refine distance
|
|
859
|
+
size_t actual_level_width = std::min(
|
|
860
|
+
pano.level_width_floats,
|
|
861
|
+
d - level * pano.level_width_floats);
|
|
862
|
+
float dot_product = fvec_inner_product(
|
|
863
|
+
x_ptr, p_ptr, actual_level_width);
|
|
864
|
+
exact_distance -= 2 * dot_product;
|
|
865
|
+
|
|
866
|
+
float cum_sum = cum_sums[cum_sum_offset];
|
|
867
|
+
float cauchy_schwarz_bound =
|
|
868
|
+
2.0f * cum_sum * query_cum_norms[level + 1];
|
|
869
|
+
float lower_bound = exact_distance - cauchy_schwarz_bound;
|
|
870
|
+
|
|
871
|
+
// Prune using Cauchy-Schwarz bound
|
|
872
|
+
if (lower_bound > res.heap_dis[0]) {
|
|
873
|
+
pruned = true;
|
|
874
|
+
break;
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
cum_sum_offset++;
|
|
878
|
+
x_ptr += pano.level_width_floats;
|
|
879
|
+
p_ptr += pano.level_width_floats;
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
if (!pruned) {
|
|
883
|
+
res.add_result(exact_distance, idx);
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
res.end();
|
|
888
|
+
indexPanorama_stats.add(local_stats);
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
}
|
|
522
892
|
} // namespace faiss
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
#include <vector>
|
|
14
14
|
|
|
15
15
|
#include <faiss/IndexFlatCodes.h>
|
|
16
|
+
#include <faiss/impl/Panorama.h>
|
|
16
17
|
|
|
17
18
|
namespace faiss {
|
|
18
19
|
|
|
@@ -99,6 +100,85 @@ struct IndexFlatL2 : IndexFlat {
|
|
|
99
100
|
void clear_l2norms();
|
|
100
101
|
};
|
|
101
102
|
|
|
103
|
+
struct IndexFlatPanorama : IndexFlat {
|
|
104
|
+
const size_t batch_size;
|
|
105
|
+
const size_t n_levels;
|
|
106
|
+
std::vector<float> cum_sums;
|
|
107
|
+
Panorama pano;
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* @param d dimensionality of the input vectors
|
|
111
|
+
* @param metric metric type
|
|
112
|
+
* @param n_levels number of Panorama levels
|
|
113
|
+
* @param batch_size batch size for Panorama storage
|
|
114
|
+
*/
|
|
115
|
+
explicit IndexFlatPanorama(
|
|
116
|
+
idx_t d,
|
|
117
|
+
MetricType metric,
|
|
118
|
+
size_t n_levels,
|
|
119
|
+
size_t batch_size)
|
|
120
|
+
: IndexFlat(d, metric),
|
|
121
|
+
batch_size(batch_size),
|
|
122
|
+
n_levels(n_levels),
|
|
123
|
+
pano(code_size, n_levels, batch_size) {
|
|
124
|
+
FAISS_THROW_IF_NOT(metric == METRIC_L2);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
void add(idx_t n, const float* x) override;
|
|
128
|
+
|
|
129
|
+
void search(
|
|
130
|
+
idx_t n,
|
|
131
|
+
const float* x,
|
|
132
|
+
idx_t k,
|
|
133
|
+
float* distances,
|
|
134
|
+
idx_t* labels,
|
|
135
|
+
const SearchParameters* params = nullptr) const override;
|
|
136
|
+
|
|
137
|
+
void range_search(
|
|
138
|
+
idx_t n,
|
|
139
|
+
const float* x,
|
|
140
|
+
float radius,
|
|
141
|
+
RangeSearchResult* result,
|
|
142
|
+
const SearchParameters* params = nullptr) const override;
|
|
143
|
+
|
|
144
|
+
void search_subset(
|
|
145
|
+
idx_t n,
|
|
146
|
+
const float* x,
|
|
147
|
+
idx_t k_base,
|
|
148
|
+
const idx_t* base_labels,
|
|
149
|
+
idx_t k,
|
|
150
|
+
float* distances,
|
|
151
|
+
idx_t* labels) const override;
|
|
152
|
+
|
|
153
|
+
void reset() override;
|
|
154
|
+
|
|
155
|
+
void reconstruct(idx_t key, float* recons) const override;
|
|
156
|
+
|
|
157
|
+
void reconstruct_n(idx_t i, idx_t n, float* recons) const override;
|
|
158
|
+
|
|
159
|
+
size_t remove_ids(const IDSelector& sel) override;
|
|
160
|
+
|
|
161
|
+
void merge_from(Index& otherIndex, idx_t add_id) override;
|
|
162
|
+
|
|
163
|
+
void add_sa_codes(idx_t n, const uint8_t* codes_in, const idx_t* xids)
|
|
164
|
+
override;
|
|
165
|
+
|
|
166
|
+
void permute_entries(const idx_t* perm);
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
struct IndexFlatL2Panorama : IndexFlatPanorama {
|
|
170
|
+
/**
|
|
171
|
+
* @param d dimensionality of the input vectors
|
|
172
|
+
* @param n_levels number of Panorama levels
|
|
173
|
+
* @param batch_size batch size for Panorama storage
|
|
174
|
+
*/
|
|
175
|
+
explicit IndexFlatL2Panorama(
|
|
176
|
+
idx_t d,
|
|
177
|
+
size_t n_levels,
|
|
178
|
+
size_t batch_size = 512)
|
|
179
|
+
: IndexFlatPanorama(d, METRIC_L2, n_levels, batch_size) {}
|
|
180
|
+
};
|
|
181
|
+
|
|
102
182
|
/// optimized version for 1D "vectors".
|
|
103
183
|
struct IndexFlat1D : IndexFlatL2 {
|
|
104
184
|
bool continuous_update = true; ///< is the permutation updated continuously?
|
|
@@ -276,7 +276,7 @@ void hnsw_search(
|
|
|
276
276
|
res.begin(i);
|
|
277
277
|
dis->set_query(x + i * index->d);
|
|
278
278
|
|
|
279
|
-
HNSWStats stats = hnsw.search(*dis, res, vt, params);
|
|
279
|
+
HNSWStats stats = hnsw.search(*dis, index, res, vt, params);
|
|
280
280
|
n1 += stats.n1;
|
|
281
281
|
n2 += stats.n2;
|
|
282
282
|
ndis += stats.ndis;
|
|
@@ -649,6 +649,95 @@ IndexHNSWFlat::IndexHNSWFlat(int d, int M, MetricType metric)
|
|
|
649
649
|
is_trained = true;
|
|
650
650
|
}
|
|
651
651
|
|
|
652
|
+
/**************************************************************
|
|
653
|
+
* IndexHNSWFlatPanorama implementation
|
|
654
|
+
**************************************************************/
|
|
655
|
+
|
|
656
|
+
void IndexHNSWFlatPanorama::compute_cum_sums(
|
|
657
|
+
const float* x,
|
|
658
|
+
float* dst_cum_sums,
|
|
659
|
+
int d,
|
|
660
|
+
int num_panorama_levels,
|
|
661
|
+
int panorama_level_width) {
|
|
662
|
+
// Iterate backwards through levels, accumulating sum as we go.
|
|
663
|
+
// This avoids computing the suffix sum for each vector, which takes
|
|
664
|
+
// extra memory.
|
|
665
|
+
|
|
666
|
+
float sum = 0.0f;
|
|
667
|
+
dst_cum_sums[num_panorama_levels] = 0.0f;
|
|
668
|
+
for (int level = num_panorama_levels - 1; level >= 0; level--) {
|
|
669
|
+
int start_idx = level * panorama_level_width;
|
|
670
|
+
int end_idx = std::min(start_idx + panorama_level_width, d);
|
|
671
|
+
for (int j = start_idx; j < end_idx; j++) {
|
|
672
|
+
sum += x[j] * x[j];
|
|
673
|
+
}
|
|
674
|
+
dst_cum_sums[level] = std::sqrt(sum);
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
IndexHNSWFlatPanorama::IndexHNSWFlatPanorama()
|
|
679
|
+
: IndexHNSWFlat(),
|
|
680
|
+
cum_sums(),
|
|
681
|
+
panorama_level_width(0),
|
|
682
|
+
num_panorama_levels(0) {}
|
|
683
|
+
|
|
684
|
+
IndexHNSWFlatPanorama::IndexHNSWFlatPanorama(
|
|
685
|
+
int d,
|
|
686
|
+
int M,
|
|
687
|
+
int num_panorama_levels,
|
|
688
|
+
MetricType metric)
|
|
689
|
+
: IndexHNSWFlat(d, M, metric),
|
|
690
|
+
cum_sums(),
|
|
691
|
+
panorama_level_width(
|
|
692
|
+
(d + num_panorama_levels - 1) / num_panorama_levels),
|
|
693
|
+
num_panorama_levels(num_panorama_levels) {
|
|
694
|
+
// For now, we only support L2 distance.
|
|
695
|
+
// Supporting dot product and cosine distance is a trivial addition
|
|
696
|
+
// left for future work.
|
|
697
|
+
FAISS_THROW_IF_NOT(metric == METRIC_L2);
|
|
698
|
+
|
|
699
|
+
// Enable Panorama search mode.
|
|
700
|
+
// This is not ideal, but is still more simple than making a subclass of
|
|
701
|
+
// HNSW and overriding the search logic.
|
|
702
|
+
hnsw.is_panorama = true;
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
void IndexHNSWFlatPanorama::add(idx_t n, const float* x) {
|
|
706
|
+
idx_t n0 = ntotal;
|
|
707
|
+
cum_sums.resize((ntotal + n) * (num_panorama_levels + 1));
|
|
708
|
+
|
|
709
|
+
for (size_t idx = 0; idx < n; idx++) {
|
|
710
|
+
const float* vector = x + idx * d;
|
|
711
|
+
compute_cum_sums(
|
|
712
|
+
vector,
|
|
713
|
+
&cum_sums[(n0 + idx) * (num_panorama_levels + 1)],
|
|
714
|
+
d,
|
|
715
|
+
num_panorama_levels,
|
|
716
|
+
panorama_level_width);
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
IndexHNSWFlat::add(n, x);
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
void IndexHNSWFlatPanorama::reset() {
|
|
723
|
+
cum_sums.clear();
|
|
724
|
+
IndexHNSWFlat::reset();
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
void IndexHNSWFlatPanorama::permute_entries(const idx_t* perm) {
|
|
728
|
+
std::vector<float> new_cum_sums(ntotal * (num_panorama_levels + 1));
|
|
729
|
+
|
|
730
|
+
for (idx_t i = 0; i < ntotal; i++) {
|
|
731
|
+
idx_t src = perm[i];
|
|
732
|
+
memcpy(&new_cum_sums[i * (num_panorama_levels + 1)],
|
|
733
|
+
&cum_sums[src * (num_panorama_levels + 1)],
|
|
734
|
+
(num_panorama_levels + 1) * sizeof(float));
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
std::swap(cum_sums, new_cum_sums);
|
|
738
|
+
IndexHNSWFlat::permute_entries(perm);
|
|
739
|
+
}
|
|
740
|
+
|
|
652
741
|
/**************************************************************
|
|
653
742
|
* IndexHNSWPQ implementation
|
|
654
743
|
**************************************************************/
|
|
@@ -111,7 +111,7 @@ struct IndexHNSW : Index {
|
|
|
111
111
|
|
|
112
112
|
void link_singletons();
|
|
113
113
|
|
|
114
|
-
void permute_entries(const idx_t* perm);
|
|
114
|
+
virtual void permute_entries(const idx_t* perm);
|
|
115
115
|
|
|
116
116
|
DistanceComputer* get_distance_computer() const override;
|
|
117
117
|
};
|
|
@@ -125,6 +125,62 @@ struct IndexHNSWFlat : IndexHNSW {
|
|
|
125
125
|
IndexHNSWFlat(int d, int M, MetricType metric = METRIC_L2);
|
|
126
126
|
};
|
|
127
127
|
|
|
128
|
+
/** Panorama implementation of IndexHNSWFlat following
|
|
129
|
+
* https://www.arxiv.org/pdf/2510.00566.
|
|
130
|
+
*
|
|
131
|
+
* Unlike cluster-based Panorama, the vectors have to be higher dimensional
|
|
132
|
+
* (i.e. typically d > 512) and/or be able to compress a lot of their energy in
|
|
133
|
+
* the early dimensions to be effective. This is because HNSW accesses vectors
|
|
134
|
+
* in a random order, which makes cache misses dominate the distance computation
|
|
135
|
+
* time.
|
|
136
|
+
*
|
|
137
|
+
* The `num_panorama_levels` parameter controls the granularity of progressive
|
|
138
|
+
* distance refinement, allowing candidates to be eliminated early using partial
|
|
139
|
+
* distance computations rather than computing full distances.
|
|
140
|
+
*
|
|
141
|
+
* NOTE: This version of HNSW handles search slightly differently than the
|
|
142
|
+
* vanilla HNSW, as it uses partial distance computations with progressive
|
|
143
|
+
* refinement bounds. Instead of computing full distances immediately for all
|
|
144
|
+
* candidates, Panorama maintains lower and upper bounds that are incrementally
|
|
145
|
+
* tightened across refinement levels. Candidates are inserted into the search
|
|
146
|
+
* beam using approximate distance estimates (LB+UB)/2 and are only fully
|
|
147
|
+
* evaluated when they survive pruning and enter the result heap. This allows
|
|
148
|
+
* the algorithm to prune unpromising candidates early using Cauchy-Schwarz
|
|
149
|
+
* bounds on partial inner products. Hence, recall is not guaranteed to be the
|
|
150
|
+
* same as vanilla HNSW due to the heterogeneous precision within the search
|
|
151
|
+
* beam (exact vs. partial distance estimates affecting traversal order).
|
|
152
|
+
*/
|
|
153
|
+
struct IndexHNSWFlatPanorama : IndexHNSWFlat {
|
|
154
|
+
IndexHNSWFlatPanorama();
|
|
155
|
+
IndexHNSWFlatPanorama(
|
|
156
|
+
int d,
|
|
157
|
+
int M,
|
|
158
|
+
int num_panorama_levels,
|
|
159
|
+
MetricType metric = METRIC_L2);
|
|
160
|
+
|
|
161
|
+
void add(idx_t n, const float* x) override;
|
|
162
|
+
void reset() override;
|
|
163
|
+
void permute_entries(const idx_t* perm) override;
|
|
164
|
+
|
|
165
|
+
/// Inline for performance - called frequently in search hot path.
|
|
166
|
+
const float* get_cum_sum(idx_t i) const {
|
|
167
|
+
return cum_sums.data() + i * (num_panorama_levels + 1);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/// Compute cumulative sums for a vector (used both for database points and
|
|
171
|
+
/// queries).
|
|
172
|
+
static void compute_cum_sums(
|
|
173
|
+
const float* x,
|
|
174
|
+
float* dst_cum_sums,
|
|
175
|
+
int d,
|
|
176
|
+
int num_panorama_levels,
|
|
177
|
+
int panorama_level_width);
|
|
178
|
+
|
|
179
|
+
std::vector<float> cum_sums;
|
|
180
|
+
const size_t panorama_level_width;
|
|
181
|
+
const size_t num_panorama_levels;
|
|
182
|
+
};
|
|
183
|
+
|
|
128
184
|
/** PQ index topped with with a HNSW structure to access elements
|
|
129
185
|
* more efficiently.
|
|
130
186
|
*/
|