faiss 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +2 -0
  4. data/ext/faiss/index.cpp +8 -0
  5. data/lib/faiss/version.rb +1 -1
  6. data/vendor/faiss/faiss/IVFlib.cpp +25 -49
  7. data/vendor/faiss/faiss/Index.cpp +11 -0
  8. data/vendor/faiss/faiss/Index.h +24 -1
  9. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +1 -0
  10. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
  11. data/vendor/faiss/faiss/IndexFastScan.cpp +1 -1
  12. data/vendor/faiss/faiss/IndexFastScan.h +3 -8
  13. data/vendor/faiss/faiss/IndexFlat.cpp +374 -4
  14. data/vendor/faiss/faiss/IndexFlat.h +80 -0
  15. data/vendor/faiss/faiss/IndexHNSW.cpp +90 -1
  16. data/vendor/faiss/faiss/IndexHNSW.h +57 -1
  17. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +34 -149
  18. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +86 -2
  19. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +3 -1
  20. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +293 -115
  21. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +52 -16
  22. data/vendor/faiss/faiss/IndexPQ.cpp +4 -1
  23. data/vendor/faiss/faiss/IndexPreTransform.cpp +14 -0
  24. data/vendor/faiss/faiss/IndexPreTransform.h +9 -0
  25. data/vendor/faiss/faiss/IndexRaBitQ.cpp +96 -16
  26. data/vendor/faiss/faiss/IndexRaBitQ.h +5 -1
  27. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +238 -93
  28. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +35 -9
  29. data/vendor/faiss/faiss/IndexRefine.cpp +49 -0
  30. data/vendor/faiss/faiss/IndexRefine.h +17 -0
  31. data/vendor/faiss/faiss/clone_index.cpp +2 -0
  32. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +3 -1
  33. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +1 -1
  34. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +1 -1
  35. data/vendor/faiss/faiss/impl/DistanceComputer.h +74 -3
  36. data/vendor/faiss/faiss/impl/HNSW.cpp +294 -15
  37. data/vendor/faiss/faiss/impl/HNSW.h +31 -2
  38. data/vendor/faiss/faiss/impl/IDSelector.h +3 -3
  39. data/vendor/faiss/faiss/impl/Panorama.cpp +193 -0
  40. data/vendor/faiss/faiss/impl/Panorama.h +204 -0
  41. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +29 -0
  42. data/vendor/faiss/faiss/impl/RaBitQStats.h +56 -0
  43. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +54 -6
  44. data/vendor/faiss/faiss/impl/RaBitQUtils.h +183 -6
  45. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +269 -84
  46. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +71 -4
  47. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +362 -0
  48. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +112 -0
  49. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +6 -9
  50. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +1 -3
  51. data/vendor/faiss/faiss/impl/index_read.cpp +156 -12
  52. data/vendor/faiss/faiss/impl/index_write.cpp +142 -19
  53. data/vendor/faiss/faiss/impl/platform_macros.h +12 -0
  54. data/vendor/faiss/faiss/impl/svs_io.cpp +86 -0
  55. data/vendor/faiss/faiss/impl/svs_io.h +67 -0
  56. data/vendor/faiss/faiss/index_factory.cpp +182 -15
  57. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +1 -1
  58. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  59. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +18 -109
  60. data/vendor/faiss/faiss/invlists/InvertedLists.h +2 -18
  61. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +1 -1
  62. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  63. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +261 -0
  64. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +117 -0
  65. data/vendor/faiss/faiss/svs/IndexSVSFlat.h +66 -0
  66. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +245 -0
  67. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +137 -0
  68. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +39 -0
  69. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +42 -0
  70. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +149 -0
  71. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +58 -0
  72. data/vendor/faiss/faiss/utils/distances.cpp +0 -3
  73. data/vendor/faiss/faiss/utils/utils.cpp +4 -0
  74. metadata +18 -1
@@ -129,6 +129,7 @@ IndexIDMap* clone_IndexIDMap(const IndexIDMap* im) {
129
129
 
130
130
  IndexHNSW* clone_IndexHNSW(const IndexHNSW* ihnsw) {
131
131
  TRYCLONE(IndexHNSW2Level, ihnsw)
132
+ TRYCLONE(IndexHNSWFlatPanorama, ihnsw)
132
133
  TRYCLONE(IndexHNSWFlat, ihnsw)
133
134
  TRYCLONE(IndexHNSWPQ, ihnsw)
134
135
  TRYCLONE(IndexHNSWSQ, ihnsw)
@@ -276,6 +277,7 @@ Index* Cloner::clone_Index(const Index* index) {
276
277
  // IndexFlat
277
278
  TRYCLONE(IndexFlat1D, index)
278
279
  TRYCLONE(IndexFlatL2, index)
280
+ TRYCLONE(IndexFlatL2Panorama, index)
279
281
  TRYCLONE(IndexFlatIP, index)
280
282
  TRYCLONE(IndexFlat, index)
281
283
 
@@ -21,8 +21,10 @@ struct GpuClonerOptions {
21
21
  /// is the coarse quantizer in float16?
22
22
  bool useFloat16CoarseQuantizer = false;
23
23
 
24
- /// for GpuIndexIVFFlat, is storage in float16?
25
24
  /// for GpuIndexIVFPQ, are intermediate calculations in float16?
25
+ /// Note: for float16 storage, use GpuIndexIVFScalarQuantizer
26
+ /// or cuVS, not GpuIndexIVFFlat. useFloat16 will not affect
27
+ /// GpuIndexIVFFlat storage.
26
28
  bool useFloat16 = false;
27
29
 
28
30
  /// use precomputed tables?
@@ -99,7 +99,7 @@ struct IVFPQBuildCagraConfig {
99
99
  /// Note: if `dim` is not multiple of `pq_dim`, a random rotation is always
100
100
  /// applied to the input data and queries to transform the working space
101
101
  /// from `dim` to `rot_dim`, which may be slightly larger than the original
102
- /// space and and is a multiple of `pq_dim` (`rot_dim % pq_dim == 0`).
102
+ /// space and is a multiple of `pq_dim` (`rot_dim % pq_dim == 0`).
103
103
  /// However, this transform is not necessary when `dim` is multiple of
104
104
  /// `pq_dim`
105
105
  /// (`dim == rot_dim`, hence no need in adding "extra" data columns /
@@ -700,7 +700,7 @@ StandardGpuResourcesImpl::getMemoryInfo() const {
700
700
  //
701
701
 
702
702
  StandardGpuResources::StandardGpuResources()
703
- : res_(new StandardGpuResourcesImpl) {}
703
+ : res_(std::make_shared<StandardGpuResourcesImpl>()) {}
704
704
 
705
705
  StandardGpuResources::~StandardGpuResources() = default;
706
706
 
@@ -113,18 +113,89 @@ struct FlatCodesDistanceComputer : DistanceComputer {
113
113
  const uint8_t* codes;
114
114
  size_t code_size;
115
115
 
116
- FlatCodesDistanceComputer(const uint8_t* codes, size_t code_size)
117
- : codes(codes), code_size(code_size) {}
116
+ const float* q = nullptr; // not used in all distance computers
118
117
 
119
- FlatCodesDistanceComputer() : codes(nullptr), code_size(0) {}
118
+ FlatCodesDistanceComputer(
119
+ const uint8_t* codes,
120
+ size_t code_size,
121
+ const float* q = nullptr)
122
+ : codes(codes), code_size(code_size), q(q) {}
123
+
124
+ explicit FlatCodesDistanceComputer(const float* q)
125
+ : codes(nullptr), code_size(0), q(q) {}
126
+
127
+ FlatCodesDistanceComputer() : codes(nullptr), code_size(0), q(nullptr) {}
120
128
 
121
129
  float operator()(idx_t i) override {
122
130
  return distance_to_code(codes + i * code_size);
123
131
  }
124
132
 
133
+ /// Computes a partial dot product over a slice of the query vector.
134
+ /// The slice is defined by the following parameters:
135
+ /// — `offset`: the starting index of the first component to include
136
+ /// — `num_components`: the number of consecutive components to include
137
+ ///
138
+ /// Components refer to raw dimensions of the flat (uncompressed) query
139
+ /// vector.
140
+ ///
141
+ /// By default, this method throws an error, as it is only implemented
142
+ /// in specific subclasses such as `FlatL2Dis`. Other flat distance
143
+ /// computers may override this when partial dot product support is needed.
144
+ ///
145
+ /// Over time, this method might be changed to a pure virtual function (`=
146
+ /// 0`) to enforce implementation in subclasses that require this
147
+ /// functionality.
148
+ ///
149
+ /// This method is not part of the generic `DistanceComputer` interface
150
+ /// because for compressed representations (e.g., product quantization),
151
+ /// calling `partial_dot_product` repeatedly is often less efficient than
152
+ /// computing the full distance at once.
153
+ ///
154
+ /// Supporting efficient partial scans generally requires a different memory
155
+ /// layout, such as interleaved blocks that keep SIMD lanes full. This is a
156
+ /// non-trivial change and not supported in the current flat layout.
157
+ ///
158
+ /// For more details on partial (or chunked) dot product computations and
159
+ /// the performance trade-offs involved, refer to the Panorama paper:
160
+ /// https://arxiv.org/pdf/2510.00566
161
+ virtual float partial_dot_product(
162
+ const idx_t /* i */,
163
+ const uint32_t /* offset */,
164
+ const uint32_t /* num_components */) {
165
+ FAISS_THROW_MSG("partial_dot_product not implemented");
166
+ }
167
+
125
168
  /// compute distance of current query to an encoded vector
126
169
  virtual float distance_to_code(const uint8_t* code) = 0;
127
170
 
171
+ /// Compute partial dot products of current query to 4 stored vectors.
172
+ /// See `partial_dot_product` for more details.
173
+ virtual void partial_dot_product_batch_4(
174
+ const idx_t idx0,
175
+ const idx_t idx1,
176
+ const idx_t idx2,
177
+ const idx_t idx3,
178
+ float& dp0,
179
+ float& dp1,
180
+ float& dp2,
181
+ float& dp3,
182
+ const uint32_t offset,
183
+ const uint32_t num_components) {
184
+ // default implementation for correctness
185
+ const float d0 =
186
+ this->partial_dot_product(idx0, offset, num_components);
187
+ const float d1 =
188
+ this->partial_dot_product(idx1, offset, num_components);
189
+ const float d2 =
190
+ this->partial_dot_product(idx2, offset, num_components);
191
+ const float d3 =
192
+ this->partial_dot_product(idx3, offset, num_components);
193
+ dp0 = d0;
194
+ dp1 = d1;
195
+ dp2 = d2;
196
+ dp3 = d3;
197
+ }
198
+
128
199
  virtual ~FlatCodesDistanceComputer() override {}
129
200
  };
130
201
 
@@ -9,6 +9,8 @@
9
9
 
10
10
  #include <cstddef>
11
11
 
12
+ #include <faiss/IndexHNSW.h>
13
+
12
14
  #include <faiss/impl/AuxIndexStructures.h>
13
15
  #include <faiss/impl/DistanceComputer.h>
14
16
  #include <faiss/impl/IDSelector.h>
@@ -588,6 +590,28 @@ void HNSW::add_with_locks(
588
590
  using MinimaxHeap = HNSW::MinimaxHeap;
589
591
  using Node = HNSW::Node;
590
592
  using C = HNSW::C;
593
+
594
+ /** Helper to extract search parameters from HNSW and SearchParameters */
595
+ static inline void extract_search_params(
596
+ const HNSW& hnsw,
597
+ const SearchParameters* params,
598
+ bool& do_dis_check,
599
+ int& efSearch,
600
+ const IDSelector*& sel) {
601
+ // can be overridden by search params
602
+ do_dis_check = hnsw.check_relative_distance;
603
+ efSearch = hnsw.efSearch;
604
+ sel = nullptr;
605
+ if (params) {
606
+ if (const SearchParametersHNSW* hnsw_params =
607
+ dynamic_cast<const SearchParametersHNSW*>(params)) {
608
+ do_dis_check = hnsw_params->check_relative_distance;
609
+ efSearch = hnsw_params->efSearch;
610
+ }
611
+ sel = params->sel;
612
+ }
613
+ }
614
+
591
615
  /** Do a BFS on the candidates list */
592
616
  int search_from_candidates(
593
617
  const HNSW& hnsw,
@@ -602,18 +626,10 @@ int search_from_candidates(
602
626
  int nres = nres_in;
603
627
  int ndis = 0;
604
628
 
605
- // can be overridden by search params
606
- bool do_dis_check = hnsw.check_relative_distance;
607
- int efSearch = hnsw.efSearch;
608
- const IDSelector* sel = nullptr;
609
- if (params) {
610
- if (const SearchParametersHNSW* hnsw_params =
611
- dynamic_cast<const SearchParametersHNSW*>(params)) {
612
- do_dis_check = hnsw_params->check_relative_distance;
613
- efSearch = hnsw_params->efSearch;
614
- }
615
- sel = params->sel;
616
- }
629
+ bool do_dis_check;
630
+ int efSearch;
631
+ const IDSelector* sel;
632
+ extract_search_params(hnsw, params, do_dis_check, efSearch, sel);
617
633
 
618
634
  C::T threshold = res.threshold;
619
635
  for (int i = 0; i < candidates.size(); i++) {
@@ -735,6 +751,253 @@ int search_from_candidates(
735
751
  return nres;
736
752
  }
737
753
 
754
+ int search_from_candidates_panorama(
755
+ const HNSW& hnsw,
756
+ const IndexHNSW* index,
757
+ DistanceComputer& qdis,
758
+ ResultHandler<C>& res,
759
+ MinimaxHeap& candidates,
760
+ VisitedTable& vt,
761
+ HNSWStats& stats,
762
+ int level,
763
+ int nres_in,
764
+ const SearchParameters* params) {
765
+ int nres = nres_in;
766
+ int ndis = 0;
767
+
768
+ bool do_dis_check;
769
+ int efSearch;
770
+ const IDSelector* sel;
771
+ extract_search_params(hnsw, params, do_dis_check, efSearch, sel);
772
+
773
+ C::T threshold = res.threshold;
774
+ for (int i = 0; i < candidates.size(); i++) {
775
+ idx_t v1 = candidates.ids[i];
776
+ float d = candidates.dis[i];
777
+ FAISS_ASSERT(v1 >= 0);
778
+ if (!sel || sel->is_member(v1)) {
779
+ if (d < threshold) {
780
+ if (res.add_result(d, v1)) {
781
+ threshold = res.threshold;
782
+ }
783
+ }
784
+ }
785
+ vt.set(v1);
786
+ }
787
+
788
+ // Validate the index type so we can access cumulative sums, n_levels, and
789
+ // get the ability to compute partial dot products.
790
+ const auto* panorama_index =
791
+ dynamic_cast<const IndexHNSWFlatPanorama*>(index);
792
+ FAISS_THROW_IF_NOT_MSG(
793
+ panorama_index, "Index must be a IndexHNSWFlatPanorama");
794
+ auto* flat_codes_qdis = dynamic_cast<FlatCodesDistanceComputer*>(&qdis);
795
+ FAISS_THROW_IF_NOT_MSG(
796
+ flat_codes_qdis,
797
+ "DistanceComputer must be a FlatCodesDistanceComputer");
798
+
799
+ // Allocate space for the index array and exact distances.
800
+ size_t M = hnsw.nb_neighbors(0);
801
+ std::vector<idx_t> index_array(M);
802
+ std::vector<float> exact_distances(M);
803
+
804
+ const float* query = flat_codes_qdis->q;
805
+ std::vector<float> query_cum_sums(panorama_index->num_panorama_levels + 1);
806
+ IndexHNSWFlatPanorama::compute_cum_sums(
807
+ query,
808
+ query_cum_sums.data(),
809
+ panorama_index->d,
810
+ panorama_index->num_panorama_levels,
811
+ panorama_index->panorama_level_width);
812
+ float query_norm_sq = query_cum_sums[0] * query_cum_sums[0];
813
+
814
+ int nstep = 0;
815
+
816
+ while (candidates.size() > 0) {
817
+ float d0 = 0;
818
+ int v0 = candidates.pop_min(&d0);
819
+
820
+ if (do_dis_check) {
821
+ // tricky stopping condition: there are more than ef
822
+ // distances that are processed already that are smaller
823
+ // than d0
824
+
825
+ int n_dis_below = candidates.count_below(d0);
826
+ if (n_dis_below >= efSearch) {
827
+ break;
828
+ }
829
+ }
830
+
831
+ size_t begin, end;
832
+ hnsw.neighbor_range(v0, level, &begin, &end);
833
+
834
+ // Unlike the vanilla HNSW, we already remove (and compact) the visited
835
+ // nodes from the candidates list at this stage. We also remove nodes
836
+ // that are not selected.
837
+ size_t initial_size = 0;
838
+ for (size_t j = begin; j < end; j++) {
839
+ int v1 = hnsw.neighbors[j];
840
+ if (v1 < 0) {
841
+ break;
842
+ }
843
+
844
+ const float* cum_sums_v1 = panorama_index->get_cum_sum(v1);
845
+ index_array[initial_size] = v1;
846
+ exact_distances[initial_size] =
847
+ query_norm_sq + cum_sums_v1[0] * cum_sums_v1[0];
848
+
849
+ bool is_selected = !sel || sel->is_member(v1);
850
+ initial_size += is_selected && !vt.get(v1) ? 1 : 0;
851
+
852
+ vt.set(v1);
853
+ }
854
+
855
+ size_t batch_size = initial_size;
856
+ size_t curr_panorama_level = 0;
857
+ const size_t num_panorama_levels = panorama_index->num_panorama_levels;
858
+ while (curr_panorama_level < num_panorama_levels && batch_size > 0) {
859
+ float query_cum_norm = query_cum_sums[curr_panorama_level + 1];
860
+
861
+ const size_t panorama_level_width =
862
+ panorama_index->panorama_level_width;
863
+ size_t start_dim = curr_panorama_level * panorama_level_width;
864
+ size_t end_dim = (curr_panorama_level + 1) * panorama_level_width;
865
+ end_dim = std::min(end_dim, static_cast<size_t>(panorama_index->d));
866
+
867
+ size_t i = 0;
868
+ size_t next_batch_size = 0;
869
+ for (; i + 3 < batch_size; i += 4) {
870
+ idx_t idx_0 = index_array[i];
871
+ idx_t idx_1 = index_array[i + 1];
872
+ idx_t idx_2 = index_array[i + 2];
873
+ idx_t idx_3 = index_array[i + 3];
874
+
875
+ float dp[4];
876
+ flat_codes_qdis->partial_dot_product_batch_4(
877
+ idx_0,
878
+ idx_1,
879
+ idx_2,
880
+ idx_3,
881
+ dp[0],
882
+ dp[1],
883
+ dp[2],
884
+ dp[3],
885
+ start_dim,
886
+ end_dim - start_dim);
887
+ ndis += 4;
888
+
889
+ float new_exact_0 = exact_distances[i + 0] - 2 * dp[0];
890
+ float new_exact_1 = exact_distances[i + 1] - 2 * dp[1];
891
+ float new_exact_2 = exact_distances[i + 2] - 2 * dp[2];
892
+ float new_exact_3 = exact_distances[i + 3] - 2 * dp[3];
893
+
894
+ float cum_sum_0 = panorama_index->get_cum_sum(
895
+ idx_0)[curr_panorama_level + 1];
896
+ float cum_sum_1 = panorama_index->get_cum_sum(
897
+ idx_1)[curr_panorama_level + 1];
898
+ float cum_sum_2 = panorama_index->get_cum_sum(
899
+ idx_2)[curr_panorama_level + 1];
900
+ float cum_sum_3 = panorama_index->get_cum_sum(
901
+ idx_3)[curr_panorama_level + 1];
902
+
903
+ float cs_bound_0 = 2.0f * cum_sum_0 * query_cum_norm;
904
+ float cs_bound_1 = 2.0f * cum_sum_1 * query_cum_norm;
905
+ float cs_bound_2 = 2.0f * cum_sum_2 * query_cum_norm;
906
+ float cs_bound_3 = 2.0f * cum_sum_3 * query_cum_norm;
907
+
908
+ float lower_bound_0 = new_exact_0 - cs_bound_0;
909
+ float lower_bound_1 = new_exact_1 - cs_bound_1;
910
+ float lower_bound_2 = new_exact_2 - cs_bound_2;
911
+ float lower_bound_3 = new_exact_3 - cs_bound_3;
912
+
913
+ // The following code is not the most branch friendly (due to
914
+ // the maintenance of the candidate heap), but micro-benchmarks
915
+ // have shown that it is not worth it to write horrible code to
916
+ // squeeze out those cycles.
917
+ if (lower_bound_0 <= threshold) {
918
+ exact_distances[next_batch_size] = new_exact_0;
919
+ index_array[next_batch_size] = idx_0;
920
+ next_batch_size += 1;
921
+ } else {
922
+ candidates.push(idx_0, new_exact_0);
923
+ }
924
+ if (lower_bound_1 <= threshold) {
925
+ exact_distances[next_batch_size] = new_exact_1;
926
+ index_array[next_batch_size] = idx_1;
927
+ next_batch_size += 1;
928
+ } else {
929
+ candidates.push(idx_1, new_exact_1);
930
+ }
931
+ if (lower_bound_2 <= threshold) {
932
+ exact_distances[next_batch_size] = new_exact_2;
933
+ index_array[next_batch_size] = idx_2;
934
+ next_batch_size += 1;
935
+ } else {
936
+ candidates.push(idx_2, new_exact_2);
937
+ }
938
+ if (lower_bound_3 <= threshold) {
939
+ exact_distances[next_batch_size] = new_exact_3;
940
+ index_array[next_batch_size] = idx_3;
941
+ next_batch_size += 1;
942
+ } else {
943
+ candidates.push(idx_3, new_exact_3);
944
+ }
945
+ }
946
+
947
+ // Process the remaining candidates.
948
+ for (; i < batch_size; i++) {
949
+ idx_t idx = index_array[i];
950
+
951
+ float dp = flat_codes_qdis->partial_dot_product(
952
+ idx, start_dim, end_dim - start_dim);
953
+ ndis += 1;
954
+ float new_exact = exact_distances[i] - 2.0f * dp;
955
+
956
+ float cum_sum = panorama_index->get_cum_sum(
957
+ idx)[curr_panorama_level + 1];
958
+ float cs_bound = 2.0f * cum_sum * query_cum_norm;
959
+ float lower_bound = new_exact - cs_bound;
960
+
961
+ if (lower_bound <= threshold) {
962
+ exact_distances[next_batch_size] = new_exact;
963
+ index_array[next_batch_size] = idx;
964
+ next_batch_size += 1;
965
+ } else {
966
+ candidates.push(idx, new_exact);
967
+ }
968
+ }
969
+
970
+ batch_size = next_batch_size;
971
+ curr_panorama_level++;
972
+ }
973
+
974
+ // Add surviving candidates to the result handler.
975
+ for (size_t i = 0; i < batch_size; i++) {
976
+ idx_t idx = index_array[i];
977
+ if (res.add_result(exact_distances[i], idx)) {
978
+ nres += 1;
979
+ }
980
+ candidates.push(idx, exact_distances[i]);
981
+ }
982
+
983
+ nstep++;
984
+ if (!do_dis_check && nstep > efSearch) {
985
+ break;
986
+ }
987
+ }
988
+
989
+ if (level == 0) {
990
+ stats.n1++;
991
+ if (candidates.size() == 0) {
992
+ stats.n2++;
993
+ }
994
+ stats.ndis += ndis;
995
+ stats.nhops += nstep;
996
+ }
997
+
998
+ return nres;
999
+ }
1000
+
738
1001
  std::priority_queue<HNSW::Node> search_from_candidate_unbounded(
739
1002
  const HNSW& hnsw,
740
1003
  const Node& node,
@@ -936,6 +1199,7 @@ int extract_k_from_ResultHandler(ResultHandler<C>& res) {
936
1199
 
937
1200
  HNSWStats HNSW::search(
938
1201
  DistanceComputer& qdis,
1202
+ const IndexHNSW* index,
939
1203
  ResultHandler<C>& res,
940
1204
  VisitedTable& vt,
941
1205
  const SearchParameters* params) const {
@@ -966,13 +1230,28 @@ HNSWStats HNSW::search(
966
1230
  }
967
1231
 
968
1232
  int ef = std::max(efSearch, k);
969
- if (bounded_queue) { // this is the most common branch
1233
+ if (bounded_queue) { // this is the most common branch, for now we only
1234
+ // support Panorama search in this branch
970
1235
  MinimaxHeap candidates(ef);
971
1236
 
972
1237
  candidates.push(nearest, d_nearest);
973
1238
 
974
- search_from_candidates(
975
- *this, qdis, res, candidates, vt, stats, 0, 0, params);
1239
+ if (!is_panorama) {
1240
+ search_from_candidates(
1241
+ *this, qdis, res, candidates, vt, stats, 0, 0, params);
1242
+ } else {
1243
+ search_from_candidates_panorama(
1244
+ *this,
1245
+ index,
1246
+ qdis,
1247
+ res,
1248
+ candidates,
1249
+ vt,
1250
+ stats,
1251
+ 0,
1252
+ 0,
1253
+ params);
1254
+ }
976
1255
  } else {
977
1256
  std::priority_queue<Node> top_candidates =
978
1257
  search_from_candidate_unbounded(
@@ -8,12 +8,12 @@
8
8
  #pragma once
9
9
 
10
10
  #include <queue>
11
- #include <unordered_set>
12
11
  #include <vector>
13
12
 
14
13
  #include <omp.h>
15
14
 
16
15
  #include <faiss/Index.h>
16
+ #include <faiss/impl/DistanceComputer.h>
17
17
  #include <faiss/impl/FaissAssert.h>
18
18
  #include <faiss/impl/maybe_owned_vector.h>
19
19
  #include <faiss/impl/platform_macros.h>
@@ -22,6 +22,10 @@
22
22
 
23
23
  namespace faiss {
24
24
 
25
+ // Forward declarations to avoid circular dependency.
26
+ struct IndexHNSW;
27
+ struct IndexHNSWFlatPanorama;
28
+
25
29
  /** Implementation of the Hierarchical Navigable Small World
26
30
  * datastructure.
27
31
  *
@@ -146,6 +150,9 @@ struct HNSW {
146
150
  /// use bounded queue during exploration
147
151
  bool search_bounded_queue = true;
148
152
 
153
+ /// use Panorama progressive pruning in search
154
+ bool is_panorama = false;
155
+
149
156
  // methods that initialize the tree sizes
150
157
 
151
158
  /// initialize the assign_probas and cum_nneighbor_per_level to
@@ -196,9 +203,15 @@ struct HNSW {
196
203
  VisitedTable& vt,
197
204
  bool keep_max_size_level0 = false);
198
205
 
199
- /// search interface for 1 point, single thread
206
+ /// Search interface for 1 point, single thread
207
+ ///
208
+ /// NOTE: We pass a reference to the index itself to allow for additional
209
+ /// state information to be passed (used for Panorama progressive pruning).
210
+ /// The alternative would be to override both HNSW::search and
211
+ /// HNSWIndex::search, which would be a nuisance of code duplication.
200
212
  HNSWStats search(
201
213
  DistanceComputer& qdis,
214
+ const IndexHNSW* index,
202
215
  ResultHandler<C>& res,
203
216
  VisitedTable& vt,
204
217
  const SearchParameters* params = nullptr) const;
@@ -267,6 +280,22 @@ int search_from_candidates(
267
280
  int nres_in = 0,
268
281
  const SearchParameters* params = nullptr);
269
282
 
283
+ /// Equivalent to `search_from_candidates`, but applies pruning with progressive
284
+ /// refinement bounds.
285
+ /// This is used in `IndexHNSWFlatPanorama` to improve the search performance
286
+ /// for higher dimensional vectors.
287
+ int search_from_candidates_panorama(
288
+ const HNSW& hnsw,
289
+ const IndexHNSW* index,
290
+ DistanceComputer& qdis,
291
+ ResultHandler<HNSW::C>& res,
292
+ HNSW::MinimaxHeap& candidates,
293
+ VisitedTable& vt,
294
+ HNSWStats& stats,
295
+ int level,
296
+ int nres_in = 0,
297
+ const SearchParameters* params = nullptr);
298
+
270
299
  HNSWStats greedy_update_nearest(
271
300
  const HNSW& hnsw,
272
301
  DistanceComputer& qdis,
@@ -131,7 +131,7 @@ struct IDSelectorAll : IDSelector {
131
131
  virtual ~IDSelectorAll() {}
132
132
  };
133
133
 
134
- /// does an AND operation on the the two given IDSelector's is_membership
134
+ /// does an AND operation on the two given IDSelector's is_membership
135
135
  /// results.
136
136
  struct IDSelectorAnd : IDSelector {
137
137
  const IDSelector* lhs;
@@ -144,7 +144,7 @@ struct IDSelectorAnd : IDSelector {
144
144
  virtual ~IDSelectorAnd() {}
145
145
  };
146
146
 
147
- /// does an OR operation on the the two given IDSelector's is_membership
147
+ /// does an OR operation on the two given IDSelector's is_membership
148
148
  /// results.
149
149
  struct IDSelectorOr : IDSelector {
150
150
  const IDSelector* lhs;
@@ -157,7 +157,7 @@ struct IDSelectorOr : IDSelector {
157
157
  virtual ~IDSelectorOr() {}
158
158
  };
159
159
 
160
- /// does an XOR operation on the the two given IDSelector's is_membership
160
+ /// does an XOR operation on the two given IDSelector's is_membership
161
161
  /// results.
162
162
  struct IDSelectorXOr : IDSelector {
163
163
  const IDSelector* lhs;