faiss 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/faiss/Clustering.cpp +32 -0
  6. data/vendor/faiss/faiss/Clustering.h +14 -0
  7. data/vendor/faiss/faiss/Index.h +1 -1
  8. data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
  9. data/vendor/faiss/faiss/Index2Layer.h +2 -16
  10. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  11. data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
  12. data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
  13. data/vendor/faiss/faiss/IndexFlat.h +9 -15
  14. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  15. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  16. data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
  17. data/vendor/faiss/faiss/IndexIVF.h +25 -7
  18. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  19. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  20. data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
  21. data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
  22. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
  23. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
  24. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
  25. data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
  26. data/vendor/faiss/faiss/IndexLSH.h +2 -15
  27. data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
  28. data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
  29. data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
  30. data/vendor/faiss/faiss/IndexPQ.h +2 -17
  31. data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
  32. data/vendor/faiss/faiss/IndexRefine.h +10 -0
  33. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
  34. data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
  35. data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
  36. data/vendor/faiss/faiss/VectorTransform.h +3 -0
  37. data/vendor/faiss/faiss/clone_index.cpp +3 -2
  38. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
  39. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  40. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
  41. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
  42. data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
  43. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
  44. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
  45. data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
  46. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  47. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
  48. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
  49. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
  50. data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
  51. data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
  52. data/vendor/faiss/faiss/impl/io.cpp +1 -1
  53. data/vendor/faiss/faiss/impl/io_macros.h +20 -0
  54. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  55. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  56. data/vendor/faiss/faiss/index_factory.cpp +585 -414
  57. data/vendor/faiss/faiss/index_factory.h +3 -0
  58. data/vendor/faiss/faiss/utils/distances.cpp +4 -2
  59. data/vendor/faiss/faiss/utils/distances.h +36 -3
  60. data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
  61. data/vendor/faiss/faiss/utils/utils.h +1 -1
  62. metadata +12 -5
  63. data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
@@ -19,17 +19,8 @@
19
19
 
20
20
  namespace faiss {
21
21
 
22
- IndexFlat::IndexFlat(idx_t d, MetricType metric) : Index(d, metric) {}
23
-
24
- void IndexFlat::add(idx_t n, const float* x) {
25
- xb.insert(xb.end(), x, x + n * d);
26
- ntotal += n;
27
- }
28
-
29
- void IndexFlat::reset() {
30
- xb.clear();
31
- ntotal = 0;
32
- }
22
+ IndexFlat::IndexFlat(idx_t d, MetricType metric)
23
+ : IndexFlatCodes(sizeof(float) * d, d, metric) {}
33
24
 
34
25
  void IndexFlat::search(
35
26
  idx_t n,
@@ -43,14 +34,14 @@ void IndexFlat::search(
43
34
 
44
35
  if (metric_type == METRIC_INNER_PRODUCT) {
45
36
  float_minheap_array_t res = {size_t(n), size_t(k), labels, distances};
46
- knn_inner_product(x, xb.data(), d, n, ntotal, &res);
37
+ knn_inner_product(x, get_xb(), d, n, ntotal, &res);
47
38
  } else if (metric_type == METRIC_L2) {
48
39
  float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances};
49
- knn_L2sqr(x, xb.data(), d, n, ntotal, &res);
40
+ knn_L2sqr(x, get_xb(), d, n, ntotal, &res);
50
41
  } else {
51
42
  float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances};
52
43
  knn_extra_metrics(
53
- x, xb.data(), d, n, ntotal, metric_type, metric_arg, &res);
44
+ x, get_xb(), d, n, ntotal, metric_type, metric_arg, &res);
54
45
  }
55
46
  }
56
47
 
@@ -62,10 +53,10 @@ void IndexFlat::range_search(
62
53
  switch (metric_type) {
63
54
  case METRIC_INNER_PRODUCT:
64
55
  range_search_inner_product(
65
- x, xb.data(), d, n, ntotal, radius, result);
56
+ x, get_xb(), d, n, ntotal, radius, result);
66
57
  break;
67
58
  case METRIC_L2:
68
- range_search_L2sqr(x, xb.data(), d, n, ntotal, radius, result);
59
+ range_search_L2sqr(x, get_xb(), d, n, ntotal, radius, result);
69
60
  break;
70
61
  default:
71
62
  FAISS_THROW_MSG("metric type not supported");
@@ -80,37 +71,16 @@ void IndexFlat::compute_distance_subset(
80
71
  const idx_t* labels) const {
81
72
  switch (metric_type) {
82
73
  case METRIC_INNER_PRODUCT:
83
- fvec_inner_products_by_idx(
84
- distances, x, xb.data(), labels, d, n, k);
74
+ fvec_inner_products_by_idx(distances, x, get_xb(), labels, d, n, k);
85
75
  break;
86
76
  case METRIC_L2:
87
- fvec_L2sqr_by_idx(distances, x, xb.data(), labels, d, n, k);
77
+ fvec_L2sqr_by_idx(distances, x, get_xb(), labels, d, n, k);
88
78
  break;
89
79
  default:
90
80
  FAISS_THROW_MSG("metric type not supported");
91
81
  }
92
82
  }
93
83
 
94
- size_t IndexFlat::remove_ids(const IDSelector& sel) {
95
- idx_t j = 0;
96
- for (idx_t i = 0; i < ntotal; i++) {
97
- if (sel.is_member(i)) {
98
- // should be removed
99
- } else {
100
- if (i > j) {
101
- memmove(&xb[d * j], &xb[d * i], sizeof(xb[0]) * d);
102
- }
103
- j++;
104
- }
105
- }
106
- size_t nremove = ntotal - j;
107
- if (nremove > 0) {
108
- ntotal = j;
109
- xb.resize(ntotal * d);
110
- }
111
- return nremove;
112
- }
113
-
114
84
  namespace {
115
85
 
116
86
  struct FlatL2Dis : DistanceComputer {
@@ -133,7 +103,7 @@ struct FlatL2Dis : DistanceComputer {
133
103
  : d(storage.d),
134
104
  nb(storage.ntotal),
135
105
  q(q),
136
- b(storage.xb.data()),
106
+ b(storage.get_xb()),
137
107
  ndis(0) {}
138
108
 
139
109
  void set_query(const float* x) override {
@@ -161,7 +131,7 @@ struct FlatIPDis : DistanceComputer {
161
131
  : d(storage.d),
162
132
  nb(storage.ntotal),
163
133
  q(q),
164
- b(storage.xb.data()),
134
+ b(storage.get_xb()),
165
135
  ndis(0) {}
166
136
 
167
137
  void set_query(const float* x) override {
@@ -178,25 +148,24 @@ DistanceComputer* IndexFlat::get_distance_computer() const {
178
148
  return new FlatIPDis(*this);
179
149
  } else {
180
150
  return get_extra_distance_computer(
181
- d, metric_type, metric_arg, ntotal, xb.data());
151
+ d, metric_type, metric_arg, ntotal, get_xb());
182
152
  }
183
153
  }
184
154
 
185
155
  void IndexFlat::reconstruct(idx_t key, float* recons) const {
186
- memcpy(recons, &(xb[key * d]), sizeof(*recons) * d);
187
- }
188
-
189
- /* The standalone codec interface */
190
- size_t IndexFlat::sa_code_size() const {
191
- return sizeof(float) * d;
156
+ memcpy(recons, &(codes[key * code_size]), code_size);
192
157
  }
193
158
 
194
159
  void IndexFlat::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
195
- memcpy(bytes, x, sizeof(float) * d * n);
160
+ if (n > 0) {
161
+ memcpy(bytes, x, sizeof(float) * d * n);
162
+ }
196
163
  }
197
164
 
198
165
  void IndexFlat::sa_decode(idx_t n, const uint8_t* bytes, float* x) const {
199
- memcpy(x, bytes, sizeof(float) * d * n);
166
+ if (n > 0) {
167
+ memcpy(x, bytes, sizeof(float) * d * n);
168
+ }
200
169
  }
201
170
 
202
171
  /***************************************************
@@ -211,9 +180,9 @@ IndexFlat1D::IndexFlat1D(bool continuous_update)
211
180
  void IndexFlat1D::update_permutation() {
212
181
  perm.resize(ntotal);
213
182
  if (ntotal < 1000000) {
214
- fvec_argsort(ntotal, xb.data(), (size_t*)perm.data());
183
+ fvec_argsort(ntotal, get_xb(), (size_t*)perm.data());
215
184
  } else {
216
- fvec_argsort_parallel(ntotal, xb.data(), (size_t*)perm.data());
185
+ fvec_argsort_parallel(ntotal, get_xb(), (size_t*)perm.data());
217
186
  }
218
187
  }
219
188
 
@@ -238,6 +207,7 @@ void IndexFlat1D::search(
238
207
 
239
208
  FAISS_THROW_IF_NOT_MSG(
240
209
  perm.size() == ntotal, "Call update_permutation before search");
210
+ const float* xb = get_xb();
241
211
 
242
212
  #pragma omp parallel for
243
213
  for (idx_t i = 0; i < n; i++) {
@@ -12,21 +12,14 @@
12
12
 
13
13
  #include <vector>
14
14
 
15
- #include <faiss/Index.h>
15
+ #include <faiss/IndexFlatCodes.h>
16
16
 
17
17
  namespace faiss {
18
18
 
19
19
  /** Index that stores the full vectors and performs exhaustive search */
20
- struct IndexFlat : Index {
21
- /// database vectors, size ntotal * d
22
- std::vector<float> xb;
23
-
20
+ struct IndexFlat : IndexFlatCodes {
24
21
  explicit IndexFlat(idx_t d, MetricType metric = METRIC_L2);
25
22
 
26
- void add(idx_t n, const float* x) override;
27
-
28
- void reset() override;
29
-
30
23
  void search(
31
24
  idx_t n,
32
25
  const float* x,
@@ -57,18 +50,19 @@ struct IndexFlat : Index {
57
50
  float* distances,
58
51
  const idx_t* labels) const;
59
52
 
60
- /** remove some ids. NB that Because of the structure of the
61
- * indexing structure, the semantics of this operation are
62
- * different from the usual ones: the new ids are shifted */
63
- size_t remove_ids(const IDSelector& sel) override;
53
+ // get pointer to the floating point data
54
+ float* get_xb() {
55
+ return (float*)codes.data();
56
+ }
57
+ const float* get_xb() const {
58
+ return (const float*)codes.data();
59
+ }
64
60
 
65
61
  IndexFlat() {}
66
62
 
67
63
  DistanceComputer* get_distance_computer() const override;
68
64
 
69
65
  /* The stanadlone codec interface (just memcopies in this case) */
70
- size_t sa_code_size() const override;
71
-
72
66
  void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
73
67
 
74
68
  void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/IndexFlatCodes.h>
9
+
10
+ #include <faiss/impl/AuxIndexStructures.h>
11
+ #include <faiss/impl/FaissAssert.h>
12
+
13
+ namespace faiss {
14
+
15
+ IndexFlatCodes::IndexFlatCodes(size_t code_size, idx_t d, MetricType metric)
16
+ : Index(d, metric), code_size(code_size) {}
17
+
18
+ IndexFlatCodes::IndexFlatCodes() : code_size(0) {}
19
+
20
+ void IndexFlatCodes::add(idx_t n, const float* x) {
21
+ FAISS_THROW_IF_NOT(is_trained);
22
+ codes.resize((ntotal + n) * code_size);
23
+ sa_encode(n, x, &codes[ntotal * code_size]);
24
+ ntotal += n;
25
+ }
26
+
27
+ void IndexFlatCodes::reset() {
28
+ codes.clear();
29
+ ntotal = 0;
30
+ }
31
+
32
+ size_t IndexFlatCodes::sa_code_size() const {
33
+ return code_size;
34
+ }
35
+
36
+ size_t IndexFlatCodes::remove_ids(const IDSelector& sel) {
37
+ idx_t j = 0;
38
+ for (idx_t i = 0; i < ntotal; i++) {
39
+ if (sel.is_member(i)) {
40
+ // should be removed
41
+ } else {
42
+ if (i > j) {
43
+ memmove(&codes[code_size * j],
44
+ &codes[code_size * i],
45
+ code_size);
46
+ }
47
+ j++;
48
+ }
49
+ }
50
+ size_t nremove = ntotal - j;
51
+ if (nremove > 0) {
52
+ ntotal = j;
53
+ codes.resize(ntotal * code_size);
54
+ }
55
+ return nremove;
56
+ }
57
+
58
+ void IndexFlatCodes::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
59
+ FAISS_THROW_IF_NOT(ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
60
+ sa_decode(ni, codes.data() + i0 * code_size, recons);
61
+ }
62
+
63
+ void IndexFlatCodes::reconstruct(idx_t key, float* recons) const {
64
+ reconstruct_n(key, 1, recons);
65
+ }
66
+
67
+ } // namespace faiss
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #pragma once
11
+
12
+ #include <faiss/Index.h>
13
+ #include <vector>
14
+
15
+ namespace faiss {
16
+
17
+ /** Index that encodes all vectors as fixed-size codes (size code_size). Storage
18
+ * is in the codes vector */
19
+ struct IndexFlatCodes : Index {
20
+ size_t code_size;
21
+
22
+ /// encoded dataset, size ntotal * code_size
23
+ std::vector<uint8_t> codes;
24
+
25
+ IndexFlatCodes();
26
+
27
+ IndexFlatCodes(size_t code_size, idx_t d, MetricType metric = METRIC_L2);
28
+
29
+ /// default add uses sa_encode
30
+ void add(idx_t n, const float* x) override;
31
+
32
+ void reset() override;
33
+
34
+ /// reconstruction using the codec interface
35
+ void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
36
+
37
+ void reconstruct(idx_t key, float* recons) const override;
38
+
39
+ size_t sa_code_size() const override;
40
+
41
+ /** remove some ids. NB that Because of the structure of the
42
+ * indexing structure, the semantics of this operation are
43
+ * different from the usual ones: the new ids are shifted */
44
+ size_t remove_ids(const IDSelector& sel) override;
45
+ };
46
+
47
+ } // namespace faiss
@@ -107,8 +107,15 @@ void Level1Quantizer::train_q1(
107
107
  } else {
108
108
  clus.train(n, x, *clustering_index);
109
109
  }
110
- if (verbose)
110
+ if (verbose) {
111
111
  printf("Adding centroids to quantizer\n");
112
+ }
113
+ if (!quantizer->is_trained) {
114
+ if (verbose) {
115
+ printf("But training it first on centroids table...\n");
116
+ }
117
+ quantizer->train(nlist, clus.centroids.data());
118
+ }
112
119
  quantizer->add(nlist, clus.centroids.data());
113
120
  }
114
121
  }
@@ -190,6 +197,20 @@ void IndexIVF::add_with_ids(idx_t n, const float* x, const idx_t* xids) {
190
197
  add_core(n, x, xids, coarse_idx.get());
191
198
  }
192
199
 
200
+ void IndexIVF::add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids) {
201
+ size_t coarse_size = coarse_code_size();
202
+ DirectMapAdd dm_adder(direct_map, n, xids);
203
+
204
+ for (idx_t i = 0; i < n; i++) {
205
+ const uint8_t* code = codes + (code_size + coarse_size) * i;
206
+ idx_t list_no = decode_listno(code);
207
+ idx_t id = xids ? xids[i] : ntotal + i;
208
+ size_t ofs = invlists->add_entry(list_no, id, code + coarse_size);
209
+ dm_adder.add(i, list_no, ofs);
210
+ }
211
+ ntotal += n;
212
+ }
213
+
193
214
  void IndexIVF::add_core(
194
215
  idx_t n,
195
216
  const float* x,
@@ -1068,6 +1089,10 @@ IndexIVF::~IndexIVF() {
1068
1089
  }
1069
1090
  }
1070
1091
 
1092
+ /*************************************************************************
1093
+ * IndexIVFStats
1094
+ *************************************************************************/
1095
+
1071
1096
  void IndexIVFStats::reset() {
1072
1097
  memset((void*)this, 0, sizeof(*this));
1073
1098
  }
@@ -1083,13 +1108,60 @@ void IndexIVFStats::add(const IndexIVFStats& other) {
1083
1108
 
1084
1109
  IndexIVFStats indexIVF_stats;
1085
1110
 
1111
+ /*************************************************************************
1112
+ * InvertedListScanner
1113
+ *************************************************************************/
1114
+
1115
+ size_t InvertedListScanner::scan_codes(
1116
+ size_t list_size,
1117
+ const uint8_t* codes,
1118
+ const idx_t* ids,
1119
+ float* simi,
1120
+ idx_t* idxi,
1121
+ size_t k) const {
1122
+ size_t nup = 0;
1123
+
1124
+ if (!keep_max) {
1125
+ for (size_t j = 0; j < list_size; j++) {
1126
+ float dis = distance_to_code(codes);
1127
+ if (dis < simi[0]) {
1128
+ int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
1129
+ maxheap_replace_top(k, simi, idxi, dis, id);
1130
+ nup++;
1131
+ }
1132
+ codes += code_size;
1133
+ }
1134
+ } else {
1135
+ for (size_t j = 0; j < list_size; j++) {
1136
+ float dis = distance_to_code(codes);
1137
+ if (dis > simi[0]) {
1138
+ int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
1139
+ minheap_replace_top(k, simi, idxi, dis, id);
1140
+ nup++;
1141
+ }
1142
+ codes += code_size;
1143
+ }
1144
+ }
1145
+ return nup;
1146
+ }
1147
+
1086
1148
  void InvertedListScanner::scan_codes_range(
1087
- size_t,
1088
- const uint8_t*,
1089
- const idx_t*,
1090
- float,
1091
- RangeQueryResult&) const {
1092
- FAISS_THROW_MSG("scan_codes_range not implemented");
1149
+ size_t list_size,
1150
+ const uint8_t* codes,
1151
+ const idx_t* ids,
1152
+ float radius,
1153
+ RangeQueryResult& res) const {
1154
+ for (size_t j = 0; j < list_size; j++) {
1155
+ float dis = distance_to_code(codes);
1156
+ bool keep = !keep_max
1157
+ ? dis < radius
1158
+ : dis > radius; // TODO templatize to remove this test
1159
+ if (keep) {
1160
+ int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
1161
+ res.add(dis, id);
1162
+ }
1163
+ codes += code_size;
1164
+ }
1093
1165
  }
1094
1166
 
1095
1167
  } // namespace faiss
@@ -38,7 +38,7 @@ struct Level1Quantizer {
38
38
  * = 2: kmeans training on a flat index + add the centroids to the quantizer
39
39
  */
40
40
  char quantizer_trains_alone;
41
- bool own_fields; ///< whether object owns the quantizer
41
+ bool own_fields; ///< whether object owns the quantizer (false by default)
42
42
 
43
43
  ClusteringParameters cp; ///< to override default clustering params
44
44
  Index* clustering_index; ///< to override index used during clustering
@@ -121,8 +121,7 @@ struct IndexIVF : Index, Level1Quantizer {
121
121
 
122
122
  /** The Inverted file takes a quantizer (an Index) on input,
123
123
  * which implements the function mapping a vector to a list
124
- * identifier. The pointer is borrowed: the quantizer should not
125
- * be deleted while the IndexIVF is in use.
124
+ * identifier.
126
125
  */
127
126
  IndexIVF(
128
127
  Index* quantizer,
@@ -171,6 +170,13 @@ struct IndexIVF : Index, Level1Quantizer {
171
170
  uint8_t* codes,
172
171
  bool include_listno = false) const = 0;
173
172
 
173
+ /** Add vectors that are computed with the standalone codec
174
+ *
175
+ * @param codes codes to add size n * sa_code_size()
176
+ * @param xids corresponding ids, size n
177
+ */
178
+ void add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids);
179
+
174
180
  /// Sub-classes that encode the residuals can train their encoders here
175
181
  /// does nothing by default
176
182
  virtual void train_residual(idx_t n, const float* x);
@@ -231,7 +237,10 @@ struct IndexIVF : Index, Level1Quantizer {
231
237
  const IVFSearchParameters* params = nullptr,
232
238
  IndexIVFStats* stats = nullptr) const;
233
239
 
234
- /// get a scanner for this index (store_pairs means ignore labels)
240
+ /** Get a scanner for this index (store_pairs means ignore labels)
241
+ *
242
+ * The default search implementation uses this to compute the distances
243
+ */
235
244
  virtual InvertedListScanner* get_InvertedListScanner(
236
245
  bool store_pairs = false) const;
237
246
 
@@ -351,6 +360,14 @@ struct RangeQueryResult;
351
360
  struct InvertedListScanner {
352
361
  using idx_t = Index::idx_t;
353
362
 
363
+ idx_t list_no = -1; ///< remember current list
364
+ bool keep_max = false; ///< keep maximum instead of minimum
365
+ /// store positions in invlists rather than labels
366
+ bool store_pairs = false;
367
+
368
+ /// used in default implementation of scan_codes
369
+ size_t code_size = 0;
370
+
354
371
  /// from now on we handle this query.
355
372
  virtual void set_query(const float* query_vector) = 0;
356
373
 
@@ -361,7 +378,8 @@ struct InvertedListScanner {
361
378
  virtual float distance_to_code(const uint8_t* code) const = 0;
362
379
 
363
380
  /** scan a set of codes, compute distances to current query and
364
- * update heap of results if necessary.
381
+ * update heap of results if necessary. Default implemetation
382
+ * calls distance_to_code.
365
383
  *
366
384
  * @param n number of codes to scan
367
385
  * @param codes codes to scan (n * code_size)
@@ -377,7 +395,7 @@ struct InvertedListScanner {
377
395
  const idx_t* ids,
378
396
  float* distances,
379
397
  idx_t* labels,
380
- size_t k) const = 0;
398
+ size_t k) const;
381
399
 
382
400
  /** scan a set of codes, compute distances to current query and
383
401
  * update results if distances are below radius
@@ -396,7 +414,7 @@ struct InvertedListScanner {
396
414
  struct IndexIVFStats {
397
415
  size_t nq; // nb of queries run
398
416
  size_t nlist; // nb of inverted lists scanned
399
- size_t ndis; // nb of distancs computed
417
+ size_t ndis; // nb of distances computed
400
418
  size_t nheap_updates; // nb of times the heap was updated
401
419
  double quantization_time; // time spent quantizing vectors (in ms)
402
420
  double search_time; // time spent searching lists (in ms)