faiss 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/faiss/version.rb +1 -1
  4. data/vendor/faiss/faiss/AutoTune.cpp +39 -29
  5. data/vendor/faiss/faiss/Clustering.cpp +4 -2
  6. data/vendor/faiss/faiss/IVFlib.cpp +14 -7
  7. data/vendor/faiss/faiss/Index.h +72 -3
  8. data/vendor/faiss/faiss/Index2Layer.cpp +2 -4
  9. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +0 -1
  10. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +1 -0
  11. data/vendor/faiss/faiss/IndexBinary.h +46 -3
  12. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +118 -4
  13. data/vendor/faiss/faiss/IndexBinaryHNSW.h +41 -0
  14. data/vendor/faiss/faiss/IndexBinaryHash.cpp +0 -1
  15. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +18 -7
  16. data/vendor/faiss/faiss/IndexBinaryIVF.h +5 -1
  17. data/vendor/faiss/faiss/IndexFlat.cpp +6 -4
  18. data/vendor/faiss/faiss/IndexHNSW.cpp +65 -24
  19. data/vendor/faiss/faiss/IndexHNSW.h +10 -1
  20. data/vendor/faiss/faiss/IndexIDMap.cpp +96 -18
  21. data/vendor/faiss/faiss/IndexIDMap.h +20 -0
  22. data/vendor/faiss/faiss/IndexIVF.cpp +28 -10
  23. data/vendor/faiss/faiss/IndexIVF.h +16 -1
  24. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -16
  25. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +18 -6
  26. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +33 -21
  27. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +16 -6
  28. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +24 -15
  29. data/vendor/faiss/faiss/IndexIVFFastScan.h +4 -2
  30. data/vendor/faiss/faiss/IndexIVFFlat.cpp +59 -43
  31. data/vendor/faiss/faiss/IndexIVFFlat.h +10 -2
  32. data/vendor/faiss/faiss/IndexIVFPQ.cpp +16 -3
  33. data/vendor/faiss/faiss/IndexIVFPQ.h +8 -1
  34. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +14 -6
  35. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +2 -1
  36. data/vendor/faiss/faiss/IndexIVFPQR.cpp +14 -4
  37. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  38. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +28 -3
  39. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +8 -1
  40. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +9 -2
  41. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
  42. data/vendor/faiss/faiss/IndexLattice.cpp +8 -4
  43. data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -7
  44. data/vendor/faiss/faiss/IndexNSG.cpp +3 -3
  45. data/vendor/faiss/faiss/IndexPQ.cpp +0 -1
  46. data/vendor/faiss/faiss/IndexPQ.h +1 -0
  47. data/vendor/faiss/faiss/IndexPQFastScan.cpp +0 -2
  48. data/vendor/faiss/faiss/IndexPreTransform.cpp +4 -2
  49. data/vendor/faiss/faiss/IndexRefine.cpp +11 -6
  50. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +16 -4
  51. data/vendor/faiss/faiss/IndexScalarQuantizer.h +10 -3
  52. data/vendor/faiss/faiss/IndexShards.cpp +7 -6
  53. data/vendor/faiss/faiss/MatrixStats.cpp +16 -8
  54. data/vendor/faiss/faiss/MetaIndexes.cpp +12 -6
  55. data/vendor/faiss/faiss/MetricType.h +5 -3
  56. data/vendor/faiss/faiss/clone_index.cpp +2 -4
  57. data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +6 -0
  58. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +9 -4
  59. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +32 -10
  60. data/vendor/faiss/faiss/gpu/GpuIndex.h +88 -0
  61. data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +125 -0
  62. data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +39 -4
  63. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +3 -3
  64. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -1
  65. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +3 -2
  66. data/vendor/faiss/faiss/gpu/utils/CuvsFilterConvert.h +41 -0
  67. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +6 -3
  68. data/vendor/faiss/faiss/impl/HNSW.cpp +34 -19
  69. data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -1
  70. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +2 -3
  71. data/vendor/faiss/faiss/impl/NNDescent.cpp +17 -9
  72. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +42 -21
  73. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +6 -24
  74. data/vendor/faiss/faiss/impl/ResultHandler.h +56 -47
  75. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +28 -15
  76. data/vendor/faiss/faiss/impl/index_read.cpp +36 -11
  77. data/vendor/faiss/faiss/impl/index_write.cpp +19 -6
  78. data/vendor/faiss/faiss/impl/io.cpp +9 -5
  79. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +18 -11
  80. data/vendor/faiss/faiss/impl/mapped_io.cpp +4 -7
  81. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +0 -1
  82. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +0 -1
  83. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +6 -6
  84. data/vendor/faiss/faiss/impl/zerocopy_io.cpp +1 -1
  85. data/vendor/faiss/faiss/impl/zerocopy_io.h +2 -2
  86. data/vendor/faiss/faiss/index_factory.cpp +49 -33
  87. data/vendor/faiss/faiss/index_factory.h +8 -2
  88. data/vendor/faiss/faiss/index_io.h +0 -3
  89. data/vendor/faiss/faiss/invlists/DirectMap.cpp +2 -1
  90. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +12 -6
  91. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +8 -4
  92. data/vendor/faiss/faiss/utils/Heap.cpp +15 -8
  93. data/vendor/faiss/faiss/utils/Heap.h +23 -12
  94. data/vendor/faiss/faiss/utils/distances.cpp +42 -21
  95. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +2 -2
  96. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +1 -1
  97. data/vendor/faiss/faiss/utils/distances_simd.cpp +5 -3
  98. data/vendor/faiss/faiss/utils/extra_distances-inl.h +27 -4
  99. data/vendor/faiss/faiss/utils/extra_distances.cpp +8 -4
  100. data/vendor/faiss/faiss/utils/hamming.cpp +20 -10
  101. data/vendor/faiss/faiss/utils/partitioning.cpp +8 -4
  102. data/vendor/faiss/faiss/utils/quantize_lut.cpp +17 -9
  103. data/vendor/faiss/faiss/utils/rabitq_simd.h +539 -0
  104. data/vendor/faiss/faiss/utils/random.cpp +14 -7
  105. data/vendor/faiss/faiss/utils/utils.cpp +0 -3
  106. metadata +5 -2
@@ -12,11 +12,11 @@
12
12
  #include <cinttypes>
13
13
  #include <cstdint>
14
14
  #include <cstdio>
15
+ #include "faiss/Index.h"
15
16
 
16
17
  #include <faiss/impl/AuxIndexStructures.h>
17
18
  #include <faiss/impl/FaissAssert.h>
18
19
  #include <faiss/utils/Heap.h>
19
- #include <faiss/utils/WorkerThread.h>
20
20
 
21
21
  namespace faiss {
22
22
 
@@ -33,6 +33,17 @@ void sync_d(IndexBinary* index) {
33
33
 
34
34
  } // anonymous namespace
35
35
 
36
+ template <typename componentT>
37
+ NumericType component_t_to_numeric() {
38
+ if constexpr (std::is_same<componentT, float>::value) {
39
+ return NumericType::Float32;
40
+ } else if constexpr (std::is_same<componentT, uint8_t>::value) {
41
+ return NumericType::UInt8;
42
+ } else {
43
+ FAISS_THROW_MSG("Unsupported component_t");
44
+ }
45
+ }
46
+
36
47
  /*****************************************************
37
48
  * IndexIDMap implementation
38
49
  *******************************************************/
@@ -47,6 +58,16 @@ IndexIDMapTemplate<IndexT>::IndexIDMapTemplate(IndexT* index) : index(index) {
47
58
  sync_d(this);
48
59
  }
49
60
 
61
+ template <typename IndexT>
62
+ void IndexIDMapTemplate<IndexT>::addEx(
63
+ idx_t,
64
+ const void*,
65
+ NumericType numeric_type) {
66
+ FAISS_THROW_MSG(
67
+ "add does not make sense with IndexIDMap, "
68
+ "use add_with_ids");
69
+ }
70
+
50
71
  template <typename IndexT>
51
72
  void IndexIDMapTemplate<IndexT>::add(
52
73
  idx_t,
@@ -56,12 +77,22 @@ void IndexIDMapTemplate<IndexT>::add(
56
77
  "use add_with_ids");
57
78
  }
58
79
 
80
+ template <typename IndexT>
81
+ void IndexIDMapTemplate<IndexT>::trainEx(
82
+ idx_t n,
83
+ const void* x,
84
+ NumericType numeric_type) {
85
+ index->trainEx(n, x, numeric_type);
86
+ this->is_trained = index->is_trained;
87
+ }
88
+
59
89
  template <typename IndexT>
60
90
  void IndexIDMapTemplate<IndexT>::train(
61
91
  idx_t n,
62
92
  const typename IndexT::component_t* x) {
63
- index->train(n, x);
64
- this->is_trained = index->is_trained;
93
+ trainEx(n,
94
+ static_cast<const void*>(x),
95
+ component_t_to_numeric<typename IndexT::component_t>());
65
96
  }
66
97
 
67
98
  template <typename IndexT>
@@ -72,16 +103,30 @@ void IndexIDMapTemplate<IndexT>::reset() {
72
103
  }
73
104
 
74
105
  template <typename IndexT>
75
- void IndexIDMapTemplate<IndexT>::add_with_ids(
106
+ void IndexIDMapTemplate<IndexT>::add_with_idsEx(
76
107
  idx_t n,
77
- const typename IndexT::component_t* x,
108
+ const void* x,
109
+ NumericType numeric_type,
78
110
  const idx_t* xids) {
79
- index->add(n, x);
80
- for (idx_t i = 0; i < n; i++)
111
+ index->addEx(n, x, numeric_type);
112
+ for (idx_t i = 0; i < n; i++) {
81
113
  id_map.push_back(xids[i]);
114
+ }
82
115
  this->ntotal = index->ntotal;
83
116
  }
84
117
 
118
+ template <typename IndexT>
119
+ void IndexIDMapTemplate<IndexT>::add_with_ids(
120
+ idx_t n,
121
+ const typename IndexT::component_t* x,
122
+ const idx_t* xids) {
123
+ add_with_idsEx(
124
+ n,
125
+ static_cast<const void*>(x),
126
+ component_t_to_numeric<typename IndexT::component_t>(),
127
+ xids);
128
+ }
129
+
85
130
  template <typename IndexT>
86
131
  size_t IndexIDMapTemplate<IndexT>::sa_code_size() const {
87
132
  return index->sa_code_size();
@@ -106,10 +151,10 @@ struct ScopedSelChange {
106
151
  SearchParameters* params = nullptr;
107
152
  IDSelector* old_sel = nullptr;
108
153
 
109
- void set(SearchParameters* params_2, IDSelector* new_sel) {
110
- this->params = params_2;
111
- old_sel = params_2->sel;
112
- params_2->sel = new_sel;
154
+ void set(SearchParameters* target_params, IDSelector* new_sel) {
155
+ this->params = target_params;
156
+ old_sel = target_params->sel;
157
+ target_params->sel = new_sel;
113
158
  }
114
159
  ~ScopedSelChange() {
115
160
  if (params) {
@@ -121,9 +166,10 @@ struct ScopedSelChange {
121
166
  } // namespace
122
167
 
123
168
  template <typename IndexT>
124
- void IndexIDMapTemplate<IndexT>::search(
169
+ void IndexIDMapTemplate<IndexT>::searchEx(
125
170
  idx_t n,
126
- const typename IndexT::component_t* x,
171
+ const void* x,
172
+ NumericType numeric_type,
127
173
  idx_t k,
128
174
  typename IndexT::distance_t* distances,
129
175
  idx_t* labels,
@@ -147,7 +193,7 @@ void IndexIDMapTemplate<IndexT>::search(
147
193
  sel_change.set(params_non_const, &this_idtrans);
148
194
  }
149
195
  }
150
- index->search(n, x, k, distances, labels, params);
196
+ index->searchEx(n, x, numeric_type, k, distances, labels, params);
151
197
  idx_t* li = labels;
152
198
  #pragma omp parallel for
153
199
  for (idx_t i = 0; i < n * k; i++) {
@@ -155,6 +201,24 @@ void IndexIDMapTemplate<IndexT>::search(
155
201
  }
156
202
  }
157
203
 
204
+ template <typename IndexT>
205
+ void IndexIDMapTemplate<IndexT>::search(
206
+ idx_t n,
207
+ const typename IndexT::component_t* x,
208
+ idx_t k,
209
+ typename IndexT::distance_t* distances,
210
+ idx_t* labels,
211
+ const SearchParameters* params) const {
212
+ searchEx(
213
+ n,
214
+ static_cast<const void*>(x),
215
+ component_t_to_numeric<typename IndexT::component_t>(),
216
+ k,
217
+ distances,
218
+ labels,
219
+ params);
220
+ }
221
+
158
222
  template <typename IndexT>
159
223
  void IndexIDMapTemplate<IndexT>::range_search(
160
224
  idx_t n,
@@ -223,8 +287,9 @@ void IndexIDMapTemplate<IndexT>::merge_from(IndexT& otherIndex, idx_t add_id) {
223
287
 
224
288
  template <typename IndexT>
225
289
  IndexIDMapTemplate<IndexT>::~IndexIDMapTemplate() {
226
- if (own_fields)
290
+ if (own_fields) {
227
291
  delete index;
292
+ }
228
293
  }
229
294
 
230
295
  /*****************************************************
@@ -236,17 +301,30 @@ IndexIDMap2Template<IndexT>::IndexIDMap2Template(IndexT* index)
236
301
  : IndexIDMapTemplate<IndexT>(index) {}
237
302
 
238
303
  template <typename IndexT>
239
- void IndexIDMap2Template<IndexT>::add_with_ids(
304
+ void IndexIDMap2Template<IndexT>::add_with_idsEx(
240
305
  idx_t n,
241
- const typename IndexT::component_t* x,
306
+ const void* x,
307
+ NumericType numeric_type,
242
308
  const idx_t* xids) {
243
309
  size_t prev_ntotal = this->ntotal;
244
- IndexIDMapTemplate<IndexT>::add_with_ids(n, x, xids);
310
+ IndexIDMapTemplate<IndexT>::add_with_idsEx(n, x, numeric_type, xids);
245
311
  for (size_t i = prev_ntotal; i < this->ntotal; i++) {
246
312
  rev_map[this->id_map[i]] = i;
247
313
  }
248
314
  }
249
315
 
316
+ template <typename IndexT>
317
+ void IndexIDMap2Template<IndexT>::add_with_ids(
318
+ idx_t n,
319
+ const typename IndexT::component_t* x,
320
+ const idx_t* xids) {
321
+ add_with_idsEx(
322
+ n,
323
+ static_cast<const void*>(x),
324
+ component_t_to_numeric<typename IndexT::component_t>(),
325
+ xids);
326
+ }
327
+
250
328
  template <typename IndexT>
251
329
  void IndexIDMap2Template<IndexT>::check_consistency() const {
252
330
  FAISS_THROW_IF_NOT(rev_map.size() == this->id_map.size());
@@ -31,9 +31,15 @@ struct IndexIDMapTemplate : IndexT {
31
31
  /// @param xids if non-null, ids to store for the vectors (size n)
32
32
  void add_with_ids(idx_t n, const component_t* x, const idx_t* xids)
33
33
  override;
34
+ void add_with_idsEx(
35
+ idx_t n,
36
+ const void* x,
37
+ NumericType numeric_type,
38
+ const idx_t* xids) override;
34
39
 
35
40
  /// this will fail. Use add_with_ids
36
41
  void add(idx_t n, const component_t* x) override;
42
+ void addEx(idx_t n, const void* x, NumericType numeric_type) override;
37
43
 
38
44
  void search(
39
45
  idx_t n,
@@ -42,8 +48,17 @@ struct IndexIDMapTemplate : IndexT {
42
48
  distance_t* distances,
43
49
  idx_t* labels,
44
50
  const SearchParameters* params = nullptr) const override;
51
+ void searchEx(
52
+ idx_t n,
53
+ const void* x,
54
+ NumericType numeric_type,
55
+ idx_t k,
56
+ distance_t* distances,
57
+ idx_t* labels,
58
+ const SearchParameters* params = nullptr) const override;
45
59
 
46
60
  void train(idx_t n, const component_t* x) override;
61
+ void trainEx(idx_t n, const void* x, NumericType numeric_type) override;
47
62
 
48
63
  void reset() override;
49
64
 
@@ -89,6 +104,11 @@ struct IndexIDMap2Template : IndexIDMapTemplate<IndexT> {
89
104
 
90
105
  void add_with_ids(idx_t n, const component_t* x, const idx_t* xids)
91
106
  override;
107
+ void add_with_idsEx(
108
+ idx_t n,
109
+ const void* x,
110
+ NumericType numeric_type,
111
+ const idx_t* xids) override;
92
112
 
93
113
  size_t remove_ids(const IDSelector& sel) override;
94
114
 
@@ -60,19 +60,22 @@ void Level1Quantizer::train_q1(
60
60
  MetricType metric_type) {
61
61
  size_t d = quantizer->d;
62
62
  if (quantizer->is_trained && (quantizer->ntotal == nlist)) {
63
- if (verbose)
63
+ if (verbose) {
64
64
  printf("IVF quantizer does not need training.\n");
65
+ }
65
66
  } else if (quantizer_trains_alone == 1) {
66
- if (verbose)
67
+ if (verbose) {
67
68
  printf("IVF quantizer trains alone...\n");
69
+ }
68
70
  quantizer->verbose = verbose;
69
71
  quantizer->train(n, x);
70
72
  FAISS_THROW_IF_NOT_MSG(
71
73
  quantizer->ntotal == nlist,
72
74
  "nlist not consistent with quantizer size");
73
75
  } else if (quantizer_trains_alone == 0) {
74
- if (verbose)
76
+ if (verbose) {
75
77
  printf("Training level-1 quantizer on %zd vectors in %zdD\n", n, d);
78
+ }
76
79
 
77
80
  Clustering clus(d, nlist, cp);
78
81
  quantizer->reset();
@@ -158,11 +161,14 @@ IndexIVF::IndexIVF(
158
161
  size_t d,
159
162
  size_t nlist,
160
163
  size_t code_size,
161
- MetricType metric)
164
+ MetricType metric,
165
+ bool own_invlists)
162
166
  : Index(d, metric),
163
167
  IndexIVFInterface(quantizer, nlist),
164
- invlists(new ArrayInvertedLists(nlist, code_size)),
165
- own_invlists(true),
168
+ invlists(
169
+ own_invlists ? new ArrayInvertedLists(nlist, code_size)
170
+ : nullptr),
171
+ own_invlists(own_invlists),
166
172
  code_size(code_size) {
167
173
  FAISS_THROW_IF_NOT(d == quantizer->d);
168
174
  is_trained = quantizer->is_trained && (quantizer->ntotal == nlist);
@@ -230,8 +236,9 @@ void IndexIVF::add_core(
230
236
  size_t nadd = 0, nminus1 = 0;
231
237
 
232
238
  for (size_t i = 0; i < n; i++) {
233
- if (coarse_idx[i] < 0)
239
+ if (coarse_idx[i] < 0) {
234
240
  nminus1++;
241
+ }
235
242
  }
236
243
 
237
244
  std::unique_ptr<uint8_t[]> flat_codes(new uint8_t[n * code_size]);
@@ -466,8 +473,9 @@ void IndexIVF::search_preassigned(
466
473
  // initialize + reorder a result heap
467
474
 
468
475
  auto init_result = [&](float* simi, idx_t* idxi) {
469
- if (!do_heap_init)
476
+ if (!do_heap_init) {
470
477
  return;
478
+ }
471
479
  if (metric_type == METRIC_INNER_PRODUCT) {
472
480
  heap_heapify<HeapForIP>(k, simi, idxi);
473
481
  } else {
@@ -487,8 +495,9 @@ void IndexIVF::search_preassigned(
487
495
  };
488
496
 
489
497
  auto reorder_result = [&](float* simi, idx_t* idxi) {
490
- if (!do_heap_init)
498
+ if (!do_heap_init) {
491
499
  return;
500
+ }
492
501
  if (metric_type == METRIC_INNER_PRODUCT) {
493
502
  heap_reorder<HeapForIP>(k, simi, idxi);
494
503
  } else {
@@ -804,8 +813,9 @@ void IndexIVF::range_search_preassigned(
804
813
 
805
814
  auto scan_list_func = [&](size_t i, size_t ik, RangeQueryResult& qres) {
806
815
  idx_t key = keys[i * nprobe + ik]; /* select the list */
807
- if (key < 0)
816
+ if (key < 0) {
808
817
  return;
818
+ }
809
819
  FAISS_THROW_IF_NOT_FMT(
810
820
  key < (idx_t)nlist,
811
821
  "Invalid key=%" PRId64 " at ik=%zd nlist=%zd\n",
@@ -956,6 +966,14 @@ bool IndexIVF::check_ids_sorted() const {
956
966
  return nflip == 0;
957
967
  }
958
968
 
969
+ void IndexIVF::decode_vectors(
970
+ idx_t /*n*/,
971
+ const uint8_t* /*codes*/,
972
+ const idx_t* /*list_nos*/,
973
+ float* /*x*/) const {
974
+ FAISS_THROW_MSG("decode_vectors not implemented");
975
+ }
976
+
959
977
  /* standalone codec interface */
960
978
  size_t IndexIVF::sa_code_size() const {
961
979
  size_t coarse_size = coarse_code_size();
@@ -210,7 +210,8 @@ struct IndexIVF : Index, IndexIVFInterface {
210
210
  size_t d,
211
211
  size_t nlist,
212
212
  size_t code_size,
213
- MetricType metric = METRIC_L2);
213
+ MetricType metric = METRIC_L2,
214
+ bool own_invlists = true);
214
215
 
215
216
  void reset() override;
216
217
 
@@ -253,6 +254,20 @@ struct IndexIVF : Index, IndexIVFInterface {
253
254
  uint8_t* codes,
254
255
  bool include_listno = false) const = 0;
255
256
 
257
+ /** Decodes a set of vectors as they would appear in a given set of inverted
258
+ * lists (inverse of encode_vectors)
259
+ *
260
+ * @param codes input codes, size n * code_size
261
+ * @param x output decoded vectors
262
+ * @param list_nos input listnos, size n
263
+ *
264
+ */
265
+ virtual void decode_vectors(
266
+ idx_t n,
267
+ const uint8_t* codes,
268
+ const idx_t* list_nos,
269
+ float* x) const;
270
+
256
271
  /** Add vectors that are computed with the standalone codec
257
272
  *
258
273
  * @param codes codes to add size n * sa_code_size()
@@ -28,8 +28,9 @@ IndexIVFAdditiveQuantizer::IndexIVFAdditiveQuantizer(
28
28
  Index* quantizer,
29
29
  size_t d,
30
30
  size_t nlist,
31
- MetricType metric)
32
- : IndexIVF(quantizer, d, nlist, 0, metric), aq(aq) {
31
+ MetricType metric,
32
+ bool own_invlists)
33
+ : IndexIVF(quantizer, d, nlist, 0, metric, own_invlists), aq(aq) {
33
34
  by_residual = true;
34
35
  }
35
36
 
@@ -89,6 +90,31 @@ void IndexIVFAdditiveQuantizer::encode_vectors(
89
90
  }
90
91
  }
91
92
 
93
+ void IndexIVFAdditiveQuantizer::decode_vectors(
94
+ idx_t n,
95
+ const uint8_t* codes,
96
+ const idx_t* listnos,
97
+ float* x) const {
98
+ #pragma omp parallel if (n > 1000)
99
+ {
100
+ std::vector<float> residual(d);
101
+
102
+ #pragma omp for
103
+ for (idx_t i = 0; i < n; i++) {
104
+ const uint8_t* code = codes + i * (code_size);
105
+ float* xi = x + i * d;
106
+ aq->decode(code, xi, 1);
107
+ if (by_residual) {
108
+ int64_t list_no = listnos[i];
109
+ quantizer->reconstruct(list_no, residual.data());
110
+ for (size_t j = 0; j < d; j++) {
111
+ xi[j] += residual[j];
112
+ }
113
+ }
114
+ }
115
+ }
116
+ }
117
+
92
118
  void IndexIVFAdditiveQuantizer::sa_decode(
93
119
  idx_t n,
94
120
  const uint8_t* codes,
@@ -301,10 +327,20 @@ IndexIVFResidualQuantizer::IndexIVFResidualQuantizer(
301
327
  size_t nlist,
302
328
  const std::vector<size_t>& nbits,
303
329
  MetricType metric,
304
- Search_type_t search_type)
305
- : IndexIVFAdditiveQuantizer(&rq, quantizer, d, nlist, metric),
330
+ Search_type_t search_type,
331
+ bool own_invlists)
332
+ : IndexIVFAdditiveQuantizer(
333
+ &rq,
334
+ quantizer,
335
+ d,
336
+ nlist,
337
+ metric,
338
+ own_invlists),
306
339
  rq(d, nbits, search_type) {
307
- code_size = invlists->code_size = rq.code_size;
340
+ code_size = rq.code_size;
341
+ if (invlists) {
342
+ invlists->code_size = code_size;
343
+ }
308
344
  }
309
345
 
310
346
  IndexIVFResidualQuantizer::IndexIVFResidualQuantizer()
@@ -317,14 +353,16 @@ IndexIVFResidualQuantizer::IndexIVFResidualQuantizer(
317
353
  size_t M, /* number of subquantizers */
318
354
  size_t nbits, /* number of bit per subvector index */
319
355
  MetricType metric,
320
- Search_type_t search_type)
356
+ Search_type_t search_type,
357
+ bool own_invlists)
321
358
  : IndexIVFResidualQuantizer(
322
359
  quantizer,
323
360
  d,
324
361
  nlist,
325
362
  std::vector<size_t>(M, nbits),
326
363
  metric,
327
- search_type) {}
364
+ search_type,
365
+ own_invlists) {}
328
366
 
329
367
  IndexIVFResidualQuantizer::~IndexIVFResidualQuantizer() = default;
330
368
 
@@ -339,10 +377,20 @@ IndexIVFLocalSearchQuantizer::IndexIVFLocalSearchQuantizer(
339
377
  size_t M, /* number of subquantizers */
340
378
  size_t nbits, /* number of bit per subvector index */
341
379
  MetricType metric,
342
- Search_type_t search_type)
343
- : IndexIVFAdditiveQuantizer(&lsq, quantizer, d, nlist, metric),
380
+ Search_type_t search_type,
381
+ bool own_invlists)
382
+ : IndexIVFAdditiveQuantizer(
383
+ &lsq,
384
+ quantizer,
385
+ d,
386
+ nlist,
387
+ metric,
388
+ own_invlists),
344
389
  lsq(d, M, nbits, search_type) {
345
- code_size = invlists->code_size = lsq.code_size;
390
+ code_size = lsq.code_size;
391
+ if (invlists) {
392
+ invlists->code_size = code_size;
393
+ }
346
394
  }
347
395
 
348
396
  IndexIVFLocalSearchQuantizer::IndexIVFLocalSearchQuantizer()
@@ -362,10 +410,20 @@ IndexIVFProductResidualQuantizer::IndexIVFProductResidualQuantizer(
362
410
  size_t Msub,
363
411
  size_t nbits,
364
412
  MetricType metric,
365
- Search_type_t search_type)
366
- : IndexIVFAdditiveQuantizer(&prq, quantizer, d, nlist, metric),
413
+ Search_type_t search_type,
414
+ bool own_invlists)
415
+ : IndexIVFAdditiveQuantizer(
416
+ &prq,
417
+ quantizer,
418
+ d,
419
+ nlist,
420
+ metric,
421
+ own_invlists),
367
422
  prq(d, nsplits, Msub, nbits, search_type) {
368
- code_size = invlists->code_size = prq.code_size;
423
+ code_size = prq.code_size;
424
+ if (invlists) {
425
+ invlists->code_size = code_size;
426
+ }
369
427
  }
370
428
 
371
429
  IndexIVFProductResidualQuantizer::IndexIVFProductResidualQuantizer()
@@ -385,10 +443,20 @@ IndexIVFProductLocalSearchQuantizer::IndexIVFProductLocalSearchQuantizer(
385
443
  size_t Msub,
386
444
  size_t nbits,
387
445
  MetricType metric,
388
- Search_type_t search_type)
389
- : IndexIVFAdditiveQuantizer(&plsq, quantizer, d, nlist, metric),
446
+ Search_type_t search_type,
447
+ bool own_invlists)
448
+ : IndexIVFAdditiveQuantizer(
449
+ &plsq,
450
+ quantizer,
451
+ d,
452
+ nlist,
453
+ metric,
454
+ own_invlists),
390
455
  plsq(d, nsplits, Msub, nbits, search_type) {
391
- code_size = invlists->code_size = plsq.code_size;
456
+ code_size = plsq.code_size;
457
+ if (invlists) {
458
+ invlists->code_size = code_size;
459
+ }
392
460
  }
393
461
 
394
462
  IndexIVFProductLocalSearchQuantizer::IndexIVFProductLocalSearchQuantizer()
@@ -35,7 +35,8 @@ struct IndexIVFAdditiveQuantizer : IndexIVF {
35
35
  Index* quantizer,
36
36
  size_t d,
37
37
  size_t nlist,
38
- MetricType metric = METRIC_L2);
38
+ MetricType metric = METRIC_L2,
39
+ bool own_invlists = true);
39
40
 
40
41
  explicit IndexIVFAdditiveQuantizer(AdditiveQuantizer* aq);
41
42
 
@@ -50,6 +51,12 @@ struct IndexIVFAdditiveQuantizer : IndexIVF {
50
51
  uint8_t* codes,
51
52
  bool include_listnos = false) const override;
52
53
 
54
+ void decode_vectors(
55
+ idx_t n,
56
+ const uint8_t* codes,
57
+ const idx_t* list_nos,
58
+ float* x) const override;
59
+
53
60
  InvertedListScanner* get_InvertedListScanner(
54
61
  bool store_pairs,
55
62
  const IDSelector* sel,
@@ -82,7 +89,8 @@ struct IndexIVFResidualQuantizer : IndexIVFAdditiveQuantizer {
82
89
  size_t nlist,
83
90
  const std::vector<size_t>& nbits,
84
91
  MetricType metric = METRIC_L2,
85
- Search_type_t search_type = AdditiveQuantizer::ST_decompress);
92
+ Search_type_t search_type = AdditiveQuantizer::ST_decompress,
93
+ bool own_invlists = true);
86
94
 
87
95
  IndexIVFResidualQuantizer(
88
96
  Index* quantizer,
@@ -91,7 +99,8 @@ struct IndexIVFResidualQuantizer : IndexIVFAdditiveQuantizer {
91
99
  size_t M, /* number of subquantizers */
92
100
  size_t nbits, /* number of bit per subvector index */
93
101
  MetricType metric = METRIC_L2,
94
- Search_type_t search_type = AdditiveQuantizer::ST_decompress);
102
+ Search_type_t search_type = AdditiveQuantizer::ST_decompress,
103
+ bool own_invlists = true);
95
104
 
96
105
  IndexIVFResidualQuantizer();
97
106
 
@@ -118,7 +127,8 @@ struct IndexIVFLocalSearchQuantizer : IndexIVFAdditiveQuantizer {
118
127
  size_t M, /* number of subquantizers */
119
128
  size_t nbits, /* number of bit per subvector index */
120
129
  MetricType metric = METRIC_L2,
121
- Search_type_t search_type = AdditiveQuantizer::ST_decompress);
130
+ Search_type_t search_type = AdditiveQuantizer::ST_decompress,
131
+ bool own_invlists = true);
122
132
 
123
133
  IndexIVFLocalSearchQuantizer();
124
134
 
@@ -147,7 +157,8 @@ struct IndexIVFProductResidualQuantizer : IndexIVFAdditiveQuantizer {
147
157
  size_t Msub,
148
158
  size_t nbits,
149
159
  MetricType metric = METRIC_L2,
150
- Search_type_t search_type = AdditiveQuantizer::ST_decompress);
160
+ Search_type_t search_type = AdditiveQuantizer::ST_decompress,
161
+ bool own_invlists = true);
151
162
 
152
163
  IndexIVFProductResidualQuantizer();
153
164
 
@@ -176,7 +187,8 @@ struct IndexIVFProductLocalSearchQuantizer : IndexIVFAdditiveQuantizer {
176
187
  size_t Msub,
177
188
  size_t nbits,
178
189
  MetricType metric = METRIC_L2,
179
- Search_type_t search_type = AdditiveQuantizer::ST_decompress);
190
+ Search_type_t search_type = AdditiveQuantizer::ST_decompress,
191
+ bool own_invlists = true);
180
192
 
181
193
  IndexIVFProductLocalSearchQuantizer();
182
194