faiss 0.4.3 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +2 -0
  4. data/ext/faiss/index.cpp +33 -6
  5. data/ext/faiss/index_binary.cpp +17 -4
  6. data/ext/faiss/kmeans.cpp +6 -6
  7. data/lib/faiss/version.rb +1 -1
  8. data/vendor/faiss/faiss/AutoTune.cpp +2 -3
  9. data/vendor/faiss/faiss/AutoTune.h +1 -1
  10. data/vendor/faiss/faiss/Clustering.cpp +2 -2
  11. data/vendor/faiss/faiss/Clustering.h +2 -2
  12. data/vendor/faiss/faiss/IVFlib.cpp +26 -51
  13. data/vendor/faiss/faiss/IVFlib.h +1 -1
  14. data/vendor/faiss/faiss/Index.cpp +11 -0
  15. data/vendor/faiss/faiss/Index.h +34 -11
  16. data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
  17. data/vendor/faiss/faiss/Index2Layer.h +2 -2
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +1 -0
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
  20. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
  21. data/vendor/faiss/faiss/IndexBinary.h +7 -7
  22. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
  23. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +8 -2
  24. data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
  25. data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
  26. data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
  27. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
  28. data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
  29. data/vendor/faiss/faiss/IndexFastScan.h +102 -7
  30. data/vendor/faiss/faiss/IndexFlat.cpp +374 -4
  31. data/vendor/faiss/faiss/IndexFlat.h +81 -1
  32. data/vendor/faiss/faiss/IndexHNSW.cpp +93 -2
  33. data/vendor/faiss/faiss/IndexHNSW.h +58 -2
  34. data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
  35. data/vendor/faiss/faiss/IndexIDMap.h +6 -6
  36. data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
  37. data/vendor/faiss/faiss/IndexIVF.h +5 -5
  38. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
  40. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
  41. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
  42. data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
  43. data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
  44. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +251 -0
  45. data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
  46. data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
  47. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
  48. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
  49. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
  50. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +99 -8
  51. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +4 -1
  52. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +828 -0
  53. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +252 -0
  54. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
  55. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
  56. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
  57. data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
  58. data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
  59. data/vendor/faiss/faiss/IndexPQ.cpp +4 -1
  60. data/vendor/faiss/faiss/IndexPQ.h +1 -1
  61. data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
  62. data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
  63. data/vendor/faiss/faiss/IndexPreTransform.cpp +14 -0
  64. data/vendor/faiss/faiss/IndexPreTransform.h +9 -0
  65. data/vendor/faiss/faiss/IndexRaBitQ.cpp +96 -13
  66. data/vendor/faiss/faiss/IndexRaBitQ.h +11 -2
  67. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +731 -0
  68. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +175 -0
  69. data/vendor/faiss/faiss/IndexRefine.cpp +49 -0
  70. data/vendor/faiss/faiss/IndexRefine.h +17 -0
  71. data/vendor/faiss/faiss/IndexShards.cpp +1 -1
  72. data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
  73. data/vendor/faiss/faiss/MetricType.h +1 -1
  74. data/vendor/faiss/faiss/VectorTransform.h +2 -2
  75. data/vendor/faiss/faiss/clone_index.cpp +5 -1
  76. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
  77. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +3 -1
  78. data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
  79. data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
  80. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
  81. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +11 -7
  82. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +1 -1
  83. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
  84. data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
  85. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
  86. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
  87. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
  88. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
  89. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
  90. data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
  91. data/vendor/faiss/faiss/impl/DistanceComputer.h +77 -6
  92. data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
  93. data/vendor/faiss/faiss/impl/HNSW.cpp +295 -16
  94. data/vendor/faiss/faiss/impl/HNSW.h +35 -6
  95. data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
  96. data/vendor/faiss/faiss/impl/IDSelector.h +4 -4
  97. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
  98. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
  99. data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
  100. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
  101. data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
  102. data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
  103. data/vendor/faiss/faiss/impl/Panorama.cpp +193 -0
  104. data/vendor/faiss/faiss/impl/Panorama.h +204 -0
  105. data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
  106. data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
  107. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
  108. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
  109. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
  110. data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
  111. data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
  112. data/vendor/faiss/faiss/impl/RaBitQStats.cpp +29 -0
  113. data/vendor/faiss/faiss/impl/RaBitQStats.h +56 -0
  114. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +294 -0
  115. data/vendor/faiss/faiss/impl/RaBitQUtils.h +330 -0
  116. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +304 -223
  117. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +72 -4
  118. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +362 -0
  119. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +112 -0
  120. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
  121. data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
  122. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +7 -10
  123. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +2 -4
  124. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
  125. data/vendor/faiss/faiss/impl/index_read.cpp +238 -10
  126. data/vendor/faiss/faiss/impl/index_write.cpp +212 -19
  127. data/vendor/faiss/faiss/impl/io.cpp +2 -2
  128. data/vendor/faiss/faiss/impl/io.h +4 -4
  129. data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
  130. data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
  131. data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
  132. data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
  133. data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
  134. data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
  135. data/vendor/faiss/faiss/impl/platform_macros.h +12 -0
  136. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
  137. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
  138. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
  139. data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
  140. data/vendor/faiss/faiss/impl/svs_io.cpp +86 -0
  141. data/vendor/faiss/faiss/impl/svs_io.h +67 -0
  142. data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
  143. data/vendor/faiss/faiss/index_factory.cpp +217 -8
  144. data/vendor/faiss/faiss/index_factory.h +1 -1
  145. data/vendor/faiss/faiss/index_io.h +1 -1
  146. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +1 -1
  147. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  148. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +115 -1
  149. data/vendor/faiss/faiss/invlists/InvertedLists.h +46 -0
  150. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +1 -1
  151. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  152. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +261 -0
  153. data/vendor/faiss/faiss/svs/IndexSVSFlat.cpp +117 -0
  154. data/vendor/faiss/faiss/svs/IndexSVSFlat.h +66 -0
  155. data/vendor/faiss/faiss/svs/IndexSVSVamana.cpp +245 -0
  156. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +137 -0
  157. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.cpp +39 -0
  158. data/vendor/faiss/faiss/svs/IndexSVSVamanaLVQ.h +42 -0
  159. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +149 -0
  160. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +58 -0
  161. data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
  162. data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
  163. data/vendor/faiss/faiss/utils/Heap.h +3 -3
  164. data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
  165. data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
  166. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
  167. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
  168. data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
  169. data/vendor/faiss/faiss/utils/distances.cpp +0 -3
  170. data/vendor/faiss/faiss/utils/distances.h +2 -2
  171. data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
  172. data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
  173. data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
  174. data/vendor/faiss/faiss/utils/hamming.h +1 -1
  175. data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
  176. data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
  177. data/vendor/faiss/faiss/utils/partitioning.h +2 -2
  178. data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
  179. data/vendor/faiss/faiss/utils/random.cpp +1 -1
  180. data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
  181. data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
  182. data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
  183. data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
  184. data/vendor/faiss/faiss/utils/utils.cpp +9 -2
  185. data/vendor/faiss/faiss/utils/utils.h +2 -2
  186. metadata +29 -1
@@ -0,0 +1,175 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <vector>
11
+
12
+ #include <faiss/IndexFastScan.h>
13
+ #include <faiss/IndexRaBitQ.h>
14
+ #include <faiss/impl/RaBitQStats.h>
15
+ #include <faiss/impl/RaBitQUtils.h>
16
+ #include <faiss/impl/RaBitQuantizer.h>
17
+ #include <faiss/impl/simd_result_handlers.h>
18
+ #include <faiss/utils/Heap.h>
19
+ #include <faiss/utils/simdlib.h>
20
+
21
+ namespace faiss {
22
+
23
+ // Import shared utilities from RaBitQUtils
24
+ using rabitq_utils::ExtraBitsFactors;
25
+ using rabitq_utils::QueryFactorsData;
26
+ using rabitq_utils::SignBitFactors;
27
+ using rabitq_utils::SignBitFactorsWithError;
28
+
29
+ /** Fast-scan version of RaBitQ index that processes 32 database vectors at a
30
+ * time using SIMD operations. Similar to IndexPQFastScan but adapted for
31
+ * RaBitQ's bit-level quantization with factors.
32
+ *
33
+ * The key differences from IndexRaBitQ:
34
+ * - Processes vectors in batches of 32
35
+ * - Uses 4-bit groupings for SIMD optimization (4 dimensions per 4-bit unit)
36
+ * - Separates factors from quantized bits for efficient processing
37
+ * - Leverages existing PQ4 FastScan infrastructure where possible
38
+ */
39
+ struct IndexRaBitQFastScan : IndexFastScan {
40
+ /// RaBitQ quantizer for encoding/decoding
41
+ RaBitQuantizer rabitq;
42
+
43
+ /// Center of all points (same as IndexRaBitQ)
44
+ std::vector<float> center;
45
+
46
+ /// Per-vector auxiliary data (1-bit codes stored separately in `codes`)
47
+ ///
48
+ /// 1-bit codes (sign bits) are stored in the inherited `codes` array from
49
+ /// IndexFastScan in packed FastScan format for SIMD processing.
50
+ ///
51
+ /// This flat_storage holds per-vector factors and refinement-bit codes:
52
+ /// Layout for 1-bit: [SignBitFactors (8 bytes)]
53
+ /// Layout for multi-bit: [SignBitFactorsWithError
54
+ /// (12B)][ref_codes][ExtraBitsFactors (8B)]
55
+ std::vector<uint8_t> flat_storage;
56
+
57
+ /// Default number of bits to quantize a query with
58
+ uint8_t qb = 8;
59
+
60
+ // quantize the query with a zero-centered scalar quantizer.
61
+ bool centered = false;
62
+
63
+ IndexRaBitQFastScan();
64
+
65
+ explicit IndexRaBitQFastScan(
66
+ idx_t d,
67
+ MetricType metric = METRIC_L2,
68
+ int bbs = 32,
69
+ uint8_t nb_bits = 1);
70
+
71
+ /// build from an existing IndexRaBitQ
72
+ explicit IndexRaBitQFastScan(const IndexRaBitQ& orig, int bbs = 32);
73
+
74
+ void train(idx_t n, const float* x) override;
75
+
76
+ void add(idx_t n, const float* x) override;
77
+
78
+ void compute_codes(uint8_t* codes, idx_t n, const float* x) const override;
79
+
80
+ /// Compute storage size per vector in flat_storage
81
+ size_t compute_per_vector_storage_size() const;
82
+
83
+ void compute_float_LUT(
84
+ float* lut,
85
+ idx_t n,
86
+ const float* x,
87
+ const FastScanDistancePostProcessing& context) const override;
88
+
89
+ void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
90
+
91
+ void search(
92
+ idx_t n,
93
+ const float* x,
94
+ idx_t k,
95
+ float* distances,
96
+ idx_t* labels,
97
+ const SearchParameters* params = nullptr) const override;
98
+
99
+ /// Override to create RaBitQ-specific handlers
100
+ SIMDResultHandlerToFloat* make_knn_handler(
101
+ bool is_max,
102
+ int /*impl*/,
103
+ idx_t n,
104
+ idx_t k,
105
+ size_t /*ntotal*/,
106
+ float* distances,
107
+ idx_t* labels,
108
+ const IDSelector* sel,
109
+ const FastScanDistancePostProcessing& context) const override;
110
+ };
111
+
112
+ /** SIMD result handler for RaBitQ FastScan that applies distance corrections
113
+ * and maintains heaps directly during SIMD operations.
114
+ *
115
+ * This handler processes batches of 32 distance computations from SIMD kernels,
116
+ * applies RaBitQ-specific adjustments (factors and normalizers), and
117
+ * immediately updates result heaps without intermediate storage. This
118
+ * eliminates the need for post-processing and provides significant memory and
119
+ * performance benefits.
120
+ *
121
+ * Key optimizations:
122
+ * - Direct heap integration (no intermediate result storage)
123
+ * - Batch-level computation of normalizers and query factors
124
+ * - Preserves exact mathematical equivalence to original RaBitQ distances
125
+ * - Runtime boolean for multi-bit support
126
+ *
127
+ * @tparam C Comparator type (CMin/CMax) for heap operations
128
+ * @tparam with_id_map Whether to use id mapping (similar to HeapHandler)
129
+ */
130
+ template <class C, bool with_id_map = false>
131
+ struct RaBitQHeapHandler
132
+ : simd_result_handlers::ResultHandlerCompare<C, with_id_map> {
133
+ using RHC = simd_result_handlers::ResultHandlerCompare<C, with_id_map>;
134
+ using RHC::normalizers;
135
+
136
+ const IndexRaBitQFastScan* rabitq_index;
137
+ float* heap_distances; // [nq * k]
138
+ int64_t* heap_labels; // [nq * k]
139
+ const size_t nq, k;
140
+ const FastScanDistancePostProcessing&
141
+ context; // Processing context with query offset
142
+ const bool is_multi_bit; // Runtime flag for multi-bit mode
143
+
144
+ // Use float-based comparator for heap operations
145
+ using Cfloat = typename std::conditional<
146
+ C::is_max,
147
+ CMax<float, int64_t>,
148
+ CMin<float, int64_t>>::type;
149
+
150
+ RaBitQHeapHandler(
151
+ const IndexRaBitQFastScan* index,
152
+ size_t nq_val,
153
+ size_t k_val,
154
+ float* distances,
155
+ int64_t* labels,
156
+ const IDSelector* sel_in,
157
+ const FastScanDistancePostProcessing& context,
158
+ bool multi_bit);
159
+
160
+ void handle(size_t q, size_t b, simd16uint16 d0, simd16uint16 d1) override;
161
+
162
+ void begin(const float* norms);
163
+
164
+ void end();
165
+
166
+ private:
167
+ /// Compute full multi-bit distance for a candidate vector (multi-bit only)
168
+ float compute_full_multibit_distance(size_t db_idx, size_t q) const;
169
+
170
+ /// Compute lower bound using 1-bit distance and error bound (multi-bit
171
+ /// only)
172
+ float compute_lower_bound(float dist_1bit, size_t db_idx, size_t q) const;
173
+ };
174
+
175
+ } // namespace faiss
@@ -341,4 +341,53 @@ void IndexRefineFlat::search(
341
341
  }
342
342
  }
343
343
 
344
+ /***************************************************
345
+ * IndexRefinePanorama
346
+ ***************************************************/
347
+
348
+ void IndexRefinePanorama::search(
349
+ idx_t n,
350
+ const float* x,
351
+ idx_t k,
352
+ float* distances,
353
+ idx_t* labels,
354
+ const SearchParameters* params_in) const {
355
+ const IndexRefineSearchParameters* params = nullptr;
356
+ if (params_in) {
357
+ params = dynamic_cast<const IndexRefineSearchParameters*>(params_in);
358
+ FAISS_THROW_IF_NOT_MSG(
359
+ params, "IndexRefineFlat params have incorrect type");
360
+ }
361
+
362
+ idx_t k_base = (params != nullptr) ? idx_t(k * params->k_factor)
363
+ : idx_t(k * k_factor);
364
+ SearchParameters* base_index_params =
365
+ (params != nullptr) ? params->base_index_params : nullptr;
366
+
367
+ FAISS_THROW_IF_NOT(k_base >= k);
368
+
369
+ FAISS_THROW_IF_NOT(base_index);
370
+ FAISS_THROW_IF_NOT(refine_index);
371
+
372
+ FAISS_THROW_IF_NOT(k > 0);
373
+ FAISS_THROW_IF_NOT(is_trained);
374
+
375
+ std::unique_ptr<idx_t[]> del1;
376
+ std::unique_ptr<float[]> del2;
377
+ idx_t* base_labels = new idx_t[n * k_base];
378
+ float* base_distances = new float[n * k_base];
379
+ del1.reset(base_labels);
380
+ del2.reset(base_distances);
381
+
382
+ base_index->search(
383
+ n, x, k_base, base_distances, base_labels, base_index_params);
384
+
385
+ for (int i = 0; i < n * k_base; i++) {
386
+ assert(base_labels[i] >= -1 && base_labels[i] < ntotal);
387
+ }
388
+
389
+ refine_index->search_subset(
390
+ n, x, k_base, base_labels, k, distances, labels);
391
+ }
392
+
344
393
  } // namespace faiss
@@ -95,4 +95,21 @@ struct IndexRefineFlat : IndexRefine {
95
95
  const SearchParameters* params = nullptr) const override;
96
96
  };
97
97
 
98
+ /** Version where the search calls search_subset, allowing for Panorama
99
+ * refinement. */
100
+ struct IndexRefinePanorama : IndexRefine {
101
+ explicit IndexRefinePanorama(Index* base_index, Index* refine_index)
102
+ : IndexRefine(base_index, refine_index) {}
103
+
104
+ IndexRefinePanorama() : IndexRefine() {}
105
+
106
+ void search(
107
+ idx_t n,
108
+ const float* x,
109
+ idx_t k,
110
+ float* distances,
111
+ idx_t* labels,
112
+ const SearchParameters* params = nullptr) const override;
113
+ };
114
+
98
115
  } // namespace faiss
@@ -264,7 +264,7 @@ void IndexShardsTemplate<IndexT>::search(
264
264
  }
265
265
  }
266
266
 
267
- // explicit instanciations
267
+ // explicit instantiations
268
268
  template struct IndexShardsTemplate<Index>;
269
269
  template struct IndexShardsTemplate<IndexBinary>;
270
270
 
@@ -77,7 +77,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x) : n(n), d(d) {
77
77
  if (d > 1024) {
78
78
  do_comment(
79
79
  "indexing this many dimensions is hard, "
80
- "please consider dimensionality reducution (with PCAMatrix)\n");
80
+ "please consider dimensionality reduction (with PCAMatrix)\n");
81
81
  }
82
82
 
83
83
  hash_value = hash_bytes((const uint8_t*)x, n * d * sizeof(*x));
@@ -125,7 +125,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x) : n(n), d(d) {
125
125
  }
126
126
  }
127
127
 
128
- // invalid vecor stats
128
+ // invalid vector stats
129
129
  if (n_valid == n) {
130
130
  do_comment("no NaN or Infs in data\n");
131
131
  } else {
@@ -229,7 +229,7 @@ MatrixStats::MatrixStats(size_t n, size_t d, const float* x) : n(n), d(d) {
229
229
  } else {
230
230
  do_comment(
231
231
  "%zd dimensions are too large "
232
- "wrt. their variance, may loose precision "
232
+ "wrt. their variance, may lose precision "
233
233
  "in IndexFlatL2 (use CenteringTransform)\n",
234
234
  n_dangerous_range);
235
235
  }
@@ -35,7 +35,7 @@ enum MetricType {
35
35
 
36
36
  /// sum_i(min(a_i, b_i)) / sum_i(max(a_i, b_i)) where a_i, b_i > 0
37
37
  METRIC_Jaccard,
38
- /// Squared Eucliden distance, ignoring NaNs
38
+ /// Squared Euclidean distance, ignoring NaNs
39
39
  METRIC_NaNEuclidean,
40
40
  /// Gower's distance - numeric dimensions are in [0,1] and categorical
41
41
  /// dimensions are negative integers
@@ -37,7 +37,7 @@ struct VectorTransform {
37
37
  * nothing by default.
38
38
  *
39
39
  * @param n nb of training vectors
40
- * @param x training vecors, size n * d
40
+ * @param x training vectors, size n * d
41
41
  */
42
42
  virtual void train(idx_t n, const float* x);
43
43
 
@@ -249,7 +249,7 @@ struct OPQMatrix : LinearTransform {
249
249
  void train(idx_t n, const float* x) override;
250
250
  };
251
251
 
252
- /** remap dimensions for intput vectors, possibly inserting 0s
252
+ /** remap dimensions for input vectors, possibly inserting 0s
253
253
  * strictly speaking this is also a linear transform but we don't want
254
254
  * to compute it with matrix multiplies */
255
255
  struct RemapDimensionsTransform : VectorTransform {
@@ -23,6 +23,7 @@
23
23
  #include <faiss/IndexIVF.h>
24
24
  #include <faiss/IndexIVFAdditiveQuantizerFastScan.h>
25
25
  #include <faiss/IndexIVFFlat.h>
26
+ #include <faiss/IndexIVFFlatPanorama.h>
26
27
  #include <faiss/IndexIVFPQ.h>
27
28
  #include <faiss/IndexIVFPQFastScan.h>
28
29
  #include <faiss/IndexIVFPQR.h>
@@ -97,6 +98,7 @@ IndexIVF* Cloner::clone_IndexIVF(const IndexIVF* ivf) {
97
98
 
98
99
  TRYCLONE(IndexIVFFlatDedup, ivf)
99
100
  TRYCLONE(IndexIVFFlat, ivf)
101
+ TRYCLONE(IndexIVFFlatPanorama, ivf)
100
102
 
101
103
  TRYCLONE(IndexIVFSpectralHash, ivf)
102
104
 
@@ -127,6 +129,7 @@ IndexIDMap* clone_IndexIDMap(const IndexIDMap* im) {
127
129
 
128
130
  IndexHNSW* clone_IndexHNSW(const IndexHNSW* ihnsw) {
129
131
  TRYCLONE(IndexHNSW2Level, ihnsw)
132
+ TRYCLONE(IndexHNSWFlatPanorama, ihnsw)
130
133
  TRYCLONE(IndexHNSWFlat, ihnsw)
131
134
  TRYCLONE(IndexHNSWPQ, ihnsw)
132
135
  TRYCLONE(IndexHNSWSQ, ihnsw)
@@ -152,7 +155,7 @@ IndexNSG* clone_IndexNSG(const IndexNSG* insg) {
152
155
  TRYCLONE(IndexNSGPQ, insg)
153
156
  TRYCLONE(IndexNSGSQ, insg)
154
157
  TRYCLONE(IndexNSG, insg) {
155
- FAISS_THROW_MSG("clone not supported for this type of IndexNNDescent");
158
+ FAISS_THROW_MSG("clone not supported for this type of IndexNSG");
156
159
  }
157
160
  }
158
161
 
@@ -274,6 +277,7 @@ Index* Cloner::clone_Index(const Index* index) {
274
277
  // IndexFlat
275
278
  TRYCLONE(IndexFlat1D, index)
276
279
  TRYCLONE(IndexFlatL2, index)
280
+ TRYCLONE(IndexFlatL2Panorama, index)
277
281
  TRYCLONE(IndexFlatIP, index)
278
282
  TRYCLONE(IndexFlat, index)
279
283
 
@@ -239,7 +239,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
239
239
  config.device = device;
240
240
  GpuIndexCagra* res =
241
241
  new GpuIndexCagra(provider, icg->d, icg->metric_type, config);
242
- res->copyFromEx(icg, icg->get_numeric_type());
242
+ res->copyFrom_ex(icg, icg->get_numeric_type());
243
243
  return res;
244
244
  }
245
245
  #endif
@@ -21,8 +21,10 @@ struct GpuClonerOptions {
21
21
  /// is the coarse quantizer in float16?
22
22
  bool useFloat16CoarseQuantizer = false;
23
23
 
24
- /// for GpuIndexIVFFlat, is storage in float16?
25
24
  /// for GpuIndexIVFPQ, are intermediate calculations in float16?
25
+ /// Note: for float16 storage, use GpuIndexIVFScalarQuantizer
26
+ /// or cuVS, not GpuIndexIVFFlat. useFloat16 will not affect
27
+ /// GpuIndexIVFFlat storage.
26
28
  bool useFloat16 = false;
27
29
 
28
30
  /// use precomputed tables?
@@ -77,13 +77,13 @@ class GpuIndex : public faiss::Index {
77
77
  /// as needed
78
78
  /// Handles paged adds if the add set is too large; calls addInternal_
79
79
  void add(idx_t, const float* x) override;
80
- void addEx(idx_t, const void* x, NumericType numeric_type) override;
80
+ void add_ex(idx_t, const void* x, NumericType numeric_type) override;
81
81
 
82
82
  /// `x` and `ids` can be resident on the CPU or any GPU; copies are
83
83
  /// performed as needed
84
84
  /// Handles paged adds if the add set is too large; calls addInternal_
85
85
  void add_with_ids(idx_t n, const float* x, const idx_t* ids) override;
86
- void add_with_idsEx(
86
+ void add_with_ids_ex(
87
87
  idx_t n,
88
88
  const void* x,
89
89
  NumericType numeric_type,
@@ -103,7 +103,7 @@ class GpuIndex : public faiss::Index {
103
103
  float* distances,
104
104
  idx_t* labels,
105
105
  const SearchParameters* params = nullptr) const override;
106
- void searchEx(
106
+ void search_ex(
107
107
  idx_t n,
108
108
  const void* x,
109
109
  NumericType numeric_type,
@@ -139,7 +139,7 @@ class GpuIndex : public faiss::Index {
139
139
  protected:
140
140
  /// Copy what we need from the CPU equivalent
141
141
  void copyFrom(const faiss::Index* index);
142
- void copyFromEx(const faiss::Index* index, NumericType numeric_type) {
142
+ void copyFrom_ex(const faiss::Index* index, NumericType numeric_type) {
143
143
  if (numeric_type == NumericType::Float32) {
144
144
  copyFrom(index);
145
145
  } else {
@@ -149,7 +149,7 @@ class GpuIndex : public faiss::Index {
149
149
 
150
150
  /// Copy what we have to the CPU equivalent
151
151
  void copyTo(faiss::Index* index) const;
152
- void copyToEx(faiss::Index* index, NumericType numeric_type) {
152
+ void copyTo_ex(faiss::Index* index, NumericType numeric_type) {
153
153
  if (numeric_type == NumericType::Float32) {
154
154
  copyTo(index);
155
155
  } else {
@@ -165,7 +165,7 @@ class GpuIndex : public faiss::Index {
165
165
  /// All data is guaranteed to be resident on our device
166
166
  virtual void addImpl_(idx_t n, const float* x, const idx_t* ids) = 0;
167
167
 
168
- virtual void addImplEx_(
168
+ virtual void addImpl_ex_(
169
169
  idx_t n,
170
170
  const void* x,
171
171
  NumericType numeric_type,
@@ -187,7 +187,7 @@ class GpuIndex : public faiss::Index {
187
187
  idx_t* labels,
188
188
  const SearchParameters* params) const = 0;
189
189
 
190
- virtual void searchImplEx_(
190
+ virtual void searchImpl_ex_(
191
191
  idx_t n,
192
192
  const void* x,
193
193
  NumericType numeric_type,
@@ -212,7 +212,7 @@ class GpuIndex : public faiss::Index {
212
212
  /// Handles paged adds if the add set is too large, passes to
213
213
  /// addImpl_ to actually perform the add for the current page
214
214
  void addPaged_(idx_t n, const float* x, const idx_t* ids);
215
- void addPagedEx_(
215
+ void addPaged_ex_(
216
216
  idx_t n,
217
217
  const void* x,
218
218
  NumericType numeric_type,
@@ -220,7 +220,7 @@ class GpuIndex : public faiss::Index {
220
220
 
221
221
  /// Calls addImpl_ for a single page of GPU-resident data
222
222
  void addPage_(idx_t n, const float* x, const idx_t* ids);
223
- void addPageEx_(
223
+ void addPage_ex_(
224
224
  idx_t n,
225
225
  const void* x,
226
226
  NumericType numeric_type,
@@ -235,7 +235,7 @@ class GpuIndex : public faiss::Index {
235
235
  idx_t* outIndicesData,
236
236
  const SearchParameters* params) const;
237
237
 
238
- void searchNonPagedEx_(
238
+ void searchNonPaged_ex_(
239
239
  idx_t n,
240
240
  const void* x,
241
241
  NumericType numeric_type,
@@ -253,7 +253,7 @@ class GpuIndex : public faiss::Index {
253
253
  float* outDistancesData,
254
254
  idx_t* outIndicesData,
255
255
  const SearchParameters* params) const;
256
- void searchFromCpuPagedEx_(
256
+ void searchFromCpuPaged_ex_(
257
257
  idx_t n,
258
258
  const void* x,
259
259
  NumericType numeric_type,
@@ -111,7 +111,7 @@ struct GpuIndexBinaryCagra : public IndexBinary {
111
111
  const SearchParameters* search_params) const;
112
112
 
113
113
  protected:
114
- /// Manages streans, cuBLAS handles and scratch memory for devices
114
+ /// Manages streams, cuBLAS handles and scratch memory for devices
115
115
  std::shared_ptr<GpuResources> resources_;
116
116
 
117
117
  /// Configuration options
@@ -86,7 +86,7 @@ class GpuIndexBinaryFlat : public IndexBinary {
86
86
  idx_t* outIndicesData) const;
87
87
 
88
88
  protected:
89
- /// Manages streans, cuBLAS handles and scratch memory for devices
89
+ /// Manages streams, cuBLAS handles and scratch memory for devices
90
90
  std::shared_ptr<GpuResources> resources_;
91
91
 
92
92
  /// Configuration options
@@ -99,7 +99,7 @@ struct IVFPQBuildCagraConfig {
99
99
  /// Note: if `dim` is not multiple of `pq_dim`, a random rotation is always
100
100
  /// applied to the input data and queries to transform the working space
101
101
  /// from `dim` to `rot_dim`, which may be slightly larger than the original
102
- /// space and and is a multiple of `pq_dim` (`rot_dim % pq_dim == 0`).
102
+ /// space and is a multiple of `pq_dim` (`rot_dim % pq_dim == 0`).
103
103
  /// However, this transform is not necessary when `dim` is multiple of
104
104
  /// `pq_dim`
105
105
  /// (`dim == rot_dim`, hence no need in adding "extra" data columns /
@@ -135,7 +135,7 @@ struct IVFPQSearchCagraConfig {
135
135
  ///
136
136
  /// The use of low-precision types reduces the amount of shared memory
137
137
  /// required at search time, so fast shared memory kernels can be used even
138
- /// for datasets with large dimansionality. Note that the recall is slightly
138
+ /// for datasets with large dimensionality. Note that the recall is slightly
139
139
  /// degraded when low-precision type is selected.
140
140
 
141
141
  cudaDataType_t lut_dtype = CUDA_R_32F;
@@ -166,6 +166,10 @@ struct IVFPQSearchCagraConfig {
166
166
  /// negative effects on the search performance if tweaked incorrectly.
167
167
 
168
168
  double preferred_shmem_carveout = 1.0;
169
+
170
+ /// Set the internal batch size to improve GPU utilization at the cost of
171
+ /// larger memory footprint.
172
+ uint32_t max_internal_batch_size = 4096;
169
173
  };
170
174
 
171
175
  struct GpuIndexCagraConfig : public GpuIndexConfig {
@@ -258,7 +262,7 @@ struct GpuIndexCagra : public GpuIndex {
258
262
  /// the base dataset. Use this function when you want to add vectors with
259
263
  /// ids. Ref: https://github.com/facebookresearch/faiss/issues/4107
260
264
  void add(idx_t n, const float* x) override;
261
- void addEx(idx_t n, const void* x, NumericType numeric_type) override;
265
+ void add_ex(idx_t n, const void* x, NumericType numeric_type) override;
262
266
 
263
267
  /// Trains CAGRA based on the given vector data.
264
268
  /// NB: The use of the train function here is to build the CAGRA graph on
@@ -266,12 +270,12 @@ struct GpuIndexCagra : public GpuIndex {
266
270
  /// of vectors (without IDs) to the index. There is no external quantizer to
267
271
  /// be trained here.
268
272
  void train(idx_t n, const float* x) override;
269
- void trainEx(idx_t n, const void* x, NumericType numeric_type) override;
273
+ void train_ex(idx_t n, const void* x, NumericType numeric_type) override;
270
274
 
271
275
  /// Initialize ourselves from the given CPU index; will overwrite
272
276
  /// all data in ourselves
273
277
  void copyFrom(const faiss::IndexHNSWCagra* index);
274
- void copyFromEx(
278
+ void copyFrom_ex(
275
279
  const faiss::IndexHNSWCagra* index,
276
280
  NumericType numeric_type);
277
281
 
@@ -289,7 +293,7 @@ struct GpuIndexCagra : public GpuIndex {
289
293
  bool addImplRequiresIDs_() const override;
290
294
 
291
295
  void addImpl_(idx_t n, const float* x, const idx_t* ids) override;
292
- void addImplEx_(
296
+ void addImpl_ex_(
293
297
  idx_t n,
294
298
  const void* x,
295
299
  NumericType numeric_type,
@@ -303,7 +307,7 @@ struct GpuIndexCagra : public GpuIndex {
303
307
  float* distances,
304
308
  idx_t* labels,
305
309
  const SearchParameters* search_params) const override;
306
- void searchImplEx_(
310
+ void searchImpl_ex_(
307
311
  idx_t n,
308
312
  const void* x,
309
313
  NumericType numeric_type,
@@ -700,7 +700,7 @@ StandardGpuResourcesImpl::getMemoryInfo() const {
700
700
  //
701
701
 
702
702
  StandardGpuResources::StandardGpuResources()
703
- : res_(new StandardGpuResourcesImpl) {}
703
+ : res_(std::make_shared<StandardGpuResourcesImpl>()) {}
704
704
 
705
705
  StandardGpuResources::~StandardGpuResources() = default;
706
706
 
@@ -5,6 +5,8 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
+ #pragma once
9
+
8
10
  #include <faiss/impl/FaissAssert.h>
9
11
 
10
12
  namespace faiss {
@@ -1,3 +1,10 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
1
8
  #include <faiss/gpu/GpuIcmEncoder.h>
2
9
  #include <faiss/gpu/StandardGpuResources.h>
3
10
  #include <faiss/gpu/test/TestUtils.h>
@@ -493,7 +493,7 @@ TEST(TestGpuIndexIVFFlat, Float32_negative) {
493
493
  // Construct a positive test set
494
494
  auto queryVecs = faiss::gpu::randVecs(opt.numQuery, opt.dim);
495
495
 
496
- // Put all vecs on positive size
496
+ // Put all vecs on positive side
497
497
  for (auto& f : queryVecs) {
498
498
  f = std::abs(f);
499
499
  }
@@ -404,7 +404,7 @@ void AdditiveQuantizer::compute_LUT(
404
404
  namespace {
405
405
 
406
406
  /* compute inner products of one query with all centroids, given a look-up
407
- * table of all inner producst with codebook entries */
407
+ * table of all inner products with codebook entries */
408
408
  void compute_inner_prod_with_LUT(
409
409
  const AdditiveQuantizer& aq,
410
410
  const float* LUT,
@@ -212,7 +212,7 @@ struct AdditiveQuantizer : Quantizer {
212
212
  idx_t* labels,
213
213
  const float* centroid_norms) const;
214
214
 
215
- virtual ~AdditiveQuantizer();
215
+ virtual ~AdditiveQuantizer() override;
216
216
  };
217
217
 
218
218
  } // namespace faiss
@@ -36,7 +36,7 @@ RangeSearchResult::RangeSearchResult(size_t nq, bool alloc_lims) : nq(nq) {
36
36
  /// for each query
37
37
  void RangeSearchResult::do_allocation() {
38
38
  // works only if all the partial results are aggregated
39
- // simulatenously
39
+ // simultaneously
40
40
  FAISS_THROW_IF_NOT(labels == nullptr && distances == nullptr);
41
41
  size_t ofs = 0;
42
42
  for (int i = 0; i < nq; i++) {
@@ -86,7 +86,7 @@ void BufferList::append_buffer() {
86
86
  wp = 0;
87
87
  }
88
88
 
89
- /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
89
+ /// copy elements ofs:ofs+n-1 seen as linear data in the buffers to
90
90
  /// tables dest_ids, dest_dis
91
91
  void BufferList::copy_range(
92
92
  size_t ofs,
@@ -80,7 +80,7 @@ struct BufferList {
80
80
  /// add one result, possibly appending a new buffer if needed
81
81
  void add(idx_t id, float dis);
82
82
 
83
- /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
83
+ /// copy elements ofs:ofs+n-1 seen as linear data in the buffers to
84
84
  /// tables dest_ids, dest_dis
85
85
  void copy_range(size_t ofs, size_t n, idx_t* dest_ids, float* dest_dis);
86
86
  };