faiss 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/vendor/faiss/faiss/Clustering.cpp +32 -0
  6. data/vendor/faiss/faiss/Clustering.h +14 -0
  7. data/vendor/faiss/faiss/Index.h +1 -1
  8. data/vendor/faiss/faiss/Index2Layer.cpp +19 -92
  9. data/vendor/faiss/faiss/Index2Layer.h +2 -16
  10. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +407 -0
  11. data/vendor/faiss/faiss/{IndexResidual.h → IndexAdditiveQuantizer.h} +101 -58
  12. data/vendor/faiss/faiss/IndexFlat.cpp +22 -52
  13. data/vendor/faiss/faiss/IndexFlat.h +9 -15
  14. data/vendor/faiss/faiss/IndexFlatCodes.cpp +67 -0
  15. data/vendor/faiss/faiss/IndexFlatCodes.h +47 -0
  16. data/vendor/faiss/faiss/IndexIVF.cpp +79 -7
  17. data/vendor/faiss/faiss/IndexIVF.h +25 -7
  18. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +316 -0
  19. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +121 -0
  20. data/vendor/faiss/faiss/IndexIVFFlat.cpp +9 -12
  21. data/vendor/faiss/faiss/IndexIVFPQ.cpp +5 -4
  22. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
  23. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +60 -39
  24. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +21 -6
  25. data/vendor/faiss/faiss/IndexLSH.cpp +4 -30
  26. data/vendor/faiss/faiss/IndexLSH.h +2 -15
  27. data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -2
  28. data/vendor/faiss/faiss/IndexNSG.cpp +0 -2
  29. data/vendor/faiss/faiss/IndexPQ.cpp +2 -51
  30. data/vendor/faiss/faiss/IndexPQ.h +2 -17
  31. data/vendor/faiss/faiss/IndexRefine.cpp +28 -0
  32. data/vendor/faiss/faiss/IndexRefine.h +10 -0
  33. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -28
  34. data/vendor/faiss/faiss/IndexScalarQuantizer.h +2 -16
  35. data/vendor/faiss/faiss/VectorTransform.cpp +2 -1
  36. data/vendor/faiss/faiss/VectorTransform.h +3 -0
  37. data/vendor/faiss/faiss/clone_index.cpp +3 -2
  38. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +2 -2
  39. data/vendor/faiss/faiss/gpu/GpuIcmEncoder.h +60 -0
  40. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +257 -24
  41. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +69 -9
  42. data/vendor/faiss/faiss/impl/HNSW.cpp +10 -5
  43. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +393 -210
  44. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +100 -28
  45. data/vendor/faiss/faiss/impl/NSG.cpp +0 -3
  46. data/vendor/faiss/faiss/impl/NSG.h +1 -1
  47. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +357 -47
  48. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +65 -7
  49. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +12 -19
  50. data/vendor/faiss/faiss/impl/index_read.cpp +102 -19
  51. data/vendor/faiss/faiss/impl/index_write.cpp +66 -16
  52. data/vendor/faiss/faiss/impl/io.cpp +1 -1
  53. data/vendor/faiss/faiss/impl/io_macros.h +20 -0
  54. data/vendor/faiss/faiss/impl/kmeans1d.cpp +301 -0
  55. data/vendor/faiss/faiss/impl/kmeans1d.h +48 -0
  56. data/vendor/faiss/faiss/index_factory.cpp +585 -414
  57. data/vendor/faiss/faiss/index_factory.h +3 -0
  58. data/vendor/faiss/faiss/utils/distances.cpp +4 -2
  59. data/vendor/faiss/faiss/utils/distances.h +36 -3
  60. data/vendor/faiss/faiss/utils/distances_simd.cpp +50 -0
  61. data/vendor/faiss/faiss/utils/utils.h +1 -1
  62. metadata +12 -5
  63. data/vendor/faiss/faiss/IndexResidual.cpp +0 -291
@@ -19,17 +19,8 @@
19
19
 
20
20
  namespace faiss {
21
21
 
22
- IndexFlat::IndexFlat(idx_t d, MetricType metric) : Index(d, metric) {}
23
-
24
- void IndexFlat::add(idx_t n, const float* x) {
25
- xb.insert(xb.end(), x, x + n * d);
26
- ntotal += n;
27
- }
28
-
29
- void IndexFlat::reset() {
30
- xb.clear();
31
- ntotal = 0;
32
- }
22
+ IndexFlat::IndexFlat(idx_t d, MetricType metric)
23
+ : IndexFlatCodes(sizeof(float) * d, d, metric) {}
33
24
 
34
25
  void IndexFlat::search(
35
26
  idx_t n,
@@ -43,14 +34,14 @@ void IndexFlat::search(
43
34
 
44
35
  if (metric_type == METRIC_INNER_PRODUCT) {
45
36
  float_minheap_array_t res = {size_t(n), size_t(k), labels, distances};
46
- knn_inner_product(x, xb.data(), d, n, ntotal, &res);
37
+ knn_inner_product(x, get_xb(), d, n, ntotal, &res);
47
38
  } else if (metric_type == METRIC_L2) {
48
39
  float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances};
49
- knn_L2sqr(x, xb.data(), d, n, ntotal, &res);
40
+ knn_L2sqr(x, get_xb(), d, n, ntotal, &res);
50
41
  } else {
51
42
  float_maxheap_array_t res = {size_t(n), size_t(k), labels, distances};
52
43
  knn_extra_metrics(
53
- x, xb.data(), d, n, ntotal, metric_type, metric_arg, &res);
44
+ x, get_xb(), d, n, ntotal, metric_type, metric_arg, &res);
54
45
  }
55
46
  }
56
47
 
@@ -62,10 +53,10 @@ void IndexFlat::range_search(
62
53
  switch (metric_type) {
63
54
  case METRIC_INNER_PRODUCT:
64
55
  range_search_inner_product(
65
- x, xb.data(), d, n, ntotal, radius, result);
56
+ x, get_xb(), d, n, ntotal, radius, result);
66
57
  break;
67
58
  case METRIC_L2:
68
- range_search_L2sqr(x, xb.data(), d, n, ntotal, radius, result);
59
+ range_search_L2sqr(x, get_xb(), d, n, ntotal, radius, result);
69
60
  break;
70
61
  default:
71
62
  FAISS_THROW_MSG("metric type not supported");
@@ -80,37 +71,16 @@ void IndexFlat::compute_distance_subset(
80
71
  const idx_t* labels) const {
81
72
  switch (metric_type) {
82
73
  case METRIC_INNER_PRODUCT:
83
- fvec_inner_products_by_idx(
84
- distances, x, xb.data(), labels, d, n, k);
74
+ fvec_inner_products_by_idx(distances, x, get_xb(), labels, d, n, k);
85
75
  break;
86
76
  case METRIC_L2:
87
- fvec_L2sqr_by_idx(distances, x, xb.data(), labels, d, n, k);
77
+ fvec_L2sqr_by_idx(distances, x, get_xb(), labels, d, n, k);
88
78
  break;
89
79
  default:
90
80
  FAISS_THROW_MSG("metric type not supported");
91
81
  }
92
82
  }
93
83
 
94
- size_t IndexFlat::remove_ids(const IDSelector& sel) {
95
- idx_t j = 0;
96
- for (idx_t i = 0; i < ntotal; i++) {
97
- if (sel.is_member(i)) {
98
- // should be removed
99
- } else {
100
- if (i > j) {
101
- memmove(&xb[d * j], &xb[d * i], sizeof(xb[0]) * d);
102
- }
103
- j++;
104
- }
105
- }
106
- size_t nremove = ntotal - j;
107
- if (nremove > 0) {
108
- ntotal = j;
109
- xb.resize(ntotal * d);
110
- }
111
- return nremove;
112
- }
113
-
114
84
  namespace {
115
85
 
116
86
  struct FlatL2Dis : DistanceComputer {
@@ -133,7 +103,7 @@ struct FlatL2Dis : DistanceComputer {
133
103
  : d(storage.d),
134
104
  nb(storage.ntotal),
135
105
  q(q),
136
- b(storage.xb.data()),
106
+ b(storage.get_xb()),
137
107
  ndis(0) {}
138
108
 
139
109
  void set_query(const float* x) override {
@@ -161,7 +131,7 @@ struct FlatIPDis : DistanceComputer {
161
131
  : d(storage.d),
162
132
  nb(storage.ntotal),
163
133
  q(q),
164
- b(storage.xb.data()),
134
+ b(storage.get_xb()),
165
135
  ndis(0) {}
166
136
 
167
137
  void set_query(const float* x) override {
@@ -178,25 +148,24 @@ DistanceComputer* IndexFlat::get_distance_computer() const {
178
148
  return new FlatIPDis(*this);
179
149
  } else {
180
150
  return get_extra_distance_computer(
181
- d, metric_type, metric_arg, ntotal, xb.data());
151
+ d, metric_type, metric_arg, ntotal, get_xb());
182
152
  }
183
153
  }
184
154
 
185
155
  void IndexFlat::reconstruct(idx_t key, float* recons) const {
186
- memcpy(recons, &(xb[key * d]), sizeof(*recons) * d);
187
- }
188
-
189
- /* The standalone codec interface */
190
- size_t IndexFlat::sa_code_size() const {
191
- return sizeof(float) * d;
156
+ memcpy(recons, &(codes[key * code_size]), code_size);
192
157
  }
193
158
 
194
159
  void IndexFlat::sa_encode(idx_t n, const float* x, uint8_t* bytes) const {
195
- memcpy(bytes, x, sizeof(float) * d * n);
160
+ if (n > 0) {
161
+ memcpy(bytes, x, sizeof(float) * d * n);
162
+ }
196
163
  }
197
164
 
198
165
  void IndexFlat::sa_decode(idx_t n, const uint8_t* bytes, float* x) const {
199
- memcpy(x, bytes, sizeof(float) * d * n);
166
+ if (n > 0) {
167
+ memcpy(x, bytes, sizeof(float) * d * n);
168
+ }
200
169
  }
201
170
 
202
171
  /***************************************************
@@ -211,9 +180,9 @@ IndexFlat1D::IndexFlat1D(bool continuous_update)
211
180
  void IndexFlat1D::update_permutation() {
212
181
  perm.resize(ntotal);
213
182
  if (ntotal < 1000000) {
214
- fvec_argsort(ntotal, xb.data(), (size_t*)perm.data());
183
+ fvec_argsort(ntotal, get_xb(), (size_t*)perm.data());
215
184
  } else {
216
- fvec_argsort_parallel(ntotal, xb.data(), (size_t*)perm.data());
185
+ fvec_argsort_parallel(ntotal, get_xb(), (size_t*)perm.data());
217
186
  }
218
187
  }
219
188
 
@@ -238,6 +207,7 @@ void IndexFlat1D::search(
238
207
 
239
208
  FAISS_THROW_IF_NOT_MSG(
240
209
  perm.size() == ntotal, "Call update_permutation before search");
210
+ const float* xb = get_xb();
241
211
 
242
212
  #pragma omp parallel for
243
213
  for (idx_t i = 0; i < n; i++) {
@@ -12,21 +12,14 @@
12
12
 
13
13
  #include <vector>
14
14
 
15
- #include <faiss/Index.h>
15
+ #include <faiss/IndexFlatCodes.h>
16
16
 
17
17
  namespace faiss {
18
18
 
19
19
  /** Index that stores the full vectors and performs exhaustive search */
20
- struct IndexFlat : Index {
21
- /// database vectors, size ntotal * d
22
- std::vector<float> xb;
23
-
20
+ struct IndexFlat : IndexFlatCodes {
24
21
  explicit IndexFlat(idx_t d, MetricType metric = METRIC_L2);
25
22
 
26
- void add(idx_t n, const float* x) override;
27
-
28
- void reset() override;
29
-
30
23
  void search(
31
24
  idx_t n,
32
25
  const float* x,
@@ -57,18 +50,19 @@ struct IndexFlat : Index {
57
50
  float* distances,
58
51
  const idx_t* labels) const;
59
52
 
60
- /** remove some ids. NB that Because of the structure of the
61
- * indexing structure, the semantics of this operation are
62
- * different from the usual ones: the new ids are shifted */
63
- size_t remove_ids(const IDSelector& sel) override;
53
+ // get pointer to the floating point data
54
+ float* get_xb() {
55
+ return (float*)codes.data();
56
+ }
57
+ const float* get_xb() const {
58
+ return (const float*)codes.data();
59
+ }
64
60
 
65
61
  IndexFlat() {}
66
62
 
67
63
  DistanceComputer* get_distance_computer() const override;
68
64
 
69
65
  /* The stanadlone codec interface (just memcopies in this case) */
70
- size_t sa_code_size() const override;
71
-
72
66
  void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
73
67
 
74
68
  void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/IndexFlatCodes.h>
9
+
10
+ #include <faiss/impl/AuxIndexStructures.h>
11
+ #include <faiss/impl/FaissAssert.h>
12
+
13
+ namespace faiss {
14
+
15
+ IndexFlatCodes::IndexFlatCodes(size_t code_size, idx_t d, MetricType metric)
16
+ : Index(d, metric), code_size(code_size) {}
17
+
18
+ IndexFlatCodes::IndexFlatCodes() : code_size(0) {}
19
+
20
+ void IndexFlatCodes::add(idx_t n, const float* x) {
21
+ FAISS_THROW_IF_NOT(is_trained);
22
+ codes.resize((ntotal + n) * code_size);
23
+ sa_encode(n, x, &codes[ntotal * code_size]);
24
+ ntotal += n;
25
+ }
26
+
27
+ void IndexFlatCodes::reset() {
28
+ codes.clear();
29
+ ntotal = 0;
30
+ }
31
+
32
+ size_t IndexFlatCodes::sa_code_size() const {
33
+ return code_size;
34
+ }
35
+
36
+ size_t IndexFlatCodes::remove_ids(const IDSelector& sel) {
37
+ idx_t j = 0;
38
+ for (idx_t i = 0; i < ntotal; i++) {
39
+ if (sel.is_member(i)) {
40
+ // should be removed
41
+ } else {
42
+ if (i > j) {
43
+ memmove(&codes[code_size * j],
44
+ &codes[code_size * i],
45
+ code_size);
46
+ }
47
+ j++;
48
+ }
49
+ }
50
+ size_t nremove = ntotal - j;
51
+ if (nremove > 0) {
52
+ ntotal = j;
53
+ codes.resize(ntotal * code_size);
54
+ }
55
+ return nremove;
56
+ }
57
+
58
+ void IndexFlatCodes::reconstruct_n(idx_t i0, idx_t ni, float* recons) const {
59
+ FAISS_THROW_IF_NOT(ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
60
+ sa_decode(ni, codes.data() + i0 * code_size, recons);
61
+ }
62
+
63
+ void IndexFlatCodes::reconstruct(idx_t key, float* recons) const {
64
+ reconstruct_n(key, 1, recons);
65
+ }
66
+
67
+ } // namespace faiss
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ // -*- c++ -*-
9
+
10
+ #pragma once
11
+
12
+ #include <faiss/Index.h>
13
+ #include <vector>
14
+
15
+ namespace faiss {
16
+
17
+ /** Index that encodes all vectors as fixed-size codes (size code_size). Storage
18
+ * is in the codes vector */
19
+ struct IndexFlatCodes : Index {
20
+ size_t code_size;
21
+
22
+ /// encoded dataset, size ntotal * code_size
23
+ std::vector<uint8_t> codes;
24
+
25
+ IndexFlatCodes();
26
+
27
+ IndexFlatCodes(size_t code_size, idx_t d, MetricType metric = METRIC_L2);
28
+
29
+ /// default add uses sa_encode
30
+ void add(idx_t n, const float* x) override;
31
+
32
+ void reset() override;
33
+
34
+ /// reconstruction using the codec interface
35
+ void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
36
+
37
+ void reconstruct(idx_t key, float* recons) const override;
38
+
39
+ size_t sa_code_size() const override;
40
+
41
+ /** remove some ids. NB that Because of the structure of the
42
+ * indexing structure, the semantics of this operation are
43
+ * different from the usual ones: the new ids are shifted */
44
+ size_t remove_ids(const IDSelector& sel) override;
45
+ };
46
+
47
+ } // namespace faiss
@@ -107,8 +107,15 @@ void Level1Quantizer::train_q1(
107
107
  } else {
108
108
  clus.train(n, x, *clustering_index);
109
109
  }
110
- if (verbose)
110
+ if (verbose) {
111
111
  printf("Adding centroids to quantizer\n");
112
+ }
113
+ if (!quantizer->is_trained) {
114
+ if (verbose) {
115
+ printf("But training it first on centroids table...\n");
116
+ }
117
+ quantizer->train(nlist, clus.centroids.data());
118
+ }
112
119
  quantizer->add(nlist, clus.centroids.data());
113
120
  }
114
121
  }
@@ -190,6 +197,20 @@ void IndexIVF::add_with_ids(idx_t n, const float* x, const idx_t* xids) {
190
197
  add_core(n, x, xids, coarse_idx.get());
191
198
  }
192
199
 
200
+ void IndexIVF::add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids) {
201
+ size_t coarse_size = coarse_code_size();
202
+ DirectMapAdd dm_adder(direct_map, n, xids);
203
+
204
+ for (idx_t i = 0; i < n; i++) {
205
+ const uint8_t* code = codes + (code_size + coarse_size) * i;
206
+ idx_t list_no = decode_listno(code);
207
+ idx_t id = xids ? xids[i] : ntotal + i;
208
+ size_t ofs = invlists->add_entry(list_no, id, code + coarse_size);
209
+ dm_adder.add(i, list_no, ofs);
210
+ }
211
+ ntotal += n;
212
+ }
213
+
193
214
  void IndexIVF::add_core(
194
215
  idx_t n,
195
216
  const float* x,
@@ -1068,6 +1089,10 @@ IndexIVF::~IndexIVF() {
1068
1089
  }
1069
1090
  }
1070
1091
 
1092
+ /*************************************************************************
1093
+ * IndexIVFStats
1094
+ *************************************************************************/
1095
+
1071
1096
  void IndexIVFStats::reset() {
1072
1097
  memset((void*)this, 0, sizeof(*this));
1073
1098
  }
@@ -1083,13 +1108,60 @@ void IndexIVFStats::add(const IndexIVFStats& other) {
1083
1108
 
1084
1109
  IndexIVFStats indexIVF_stats;
1085
1110
 
1111
+ /*************************************************************************
1112
+ * InvertedListScanner
1113
+ *************************************************************************/
1114
+
1115
+ size_t InvertedListScanner::scan_codes(
1116
+ size_t list_size,
1117
+ const uint8_t* codes,
1118
+ const idx_t* ids,
1119
+ float* simi,
1120
+ idx_t* idxi,
1121
+ size_t k) const {
1122
+ size_t nup = 0;
1123
+
1124
+ if (!keep_max) {
1125
+ for (size_t j = 0; j < list_size; j++) {
1126
+ float dis = distance_to_code(codes);
1127
+ if (dis < simi[0]) {
1128
+ int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
1129
+ maxheap_replace_top(k, simi, idxi, dis, id);
1130
+ nup++;
1131
+ }
1132
+ codes += code_size;
1133
+ }
1134
+ } else {
1135
+ for (size_t j = 0; j < list_size; j++) {
1136
+ float dis = distance_to_code(codes);
1137
+ if (dis > simi[0]) {
1138
+ int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
1139
+ minheap_replace_top(k, simi, idxi, dis, id);
1140
+ nup++;
1141
+ }
1142
+ codes += code_size;
1143
+ }
1144
+ }
1145
+ return nup;
1146
+ }
1147
+
1086
1148
  void InvertedListScanner::scan_codes_range(
1087
- size_t,
1088
- const uint8_t*,
1089
- const idx_t*,
1090
- float,
1091
- RangeQueryResult&) const {
1092
- FAISS_THROW_MSG("scan_codes_range not implemented");
1149
+ size_t list_size,
1150
+ const uint8_t* codes,
1151
+ const idx_t* ids,
1152
+ float radius,
1153
+ RangeQueryResult& res) const {
1154
+ for (size_t j = 0; j < list_size; j++) {
1155
+ float dis = distance_to_code(codes);
1156
+ bool keep = !keep_max
1157
+ ? dis < radius
1158
+ : dis > radius; // TODO templatize to remove this test
1159
+ if (keep) {
1160
+ int64_t id = store_pairs ? lo_build(list_no, j) : ids[j];
1161
+ res.add(dis, id);
1162
+ }
1163
+ codes += code_size;
1164
+ }
1093
1165
  }
1094
1166
 
1095
1167
  } // namespace faiss
@@ -38,7 +38,7 @@ struct Level1Quantizer {
38
38
  * = 2: kmeans training on a flat index + add the centroids to the quantizer
39
39
  */
40
40
  char quantizer_trains_alone;
41
- bool own_fields; ///< whether object owns the quantizer
41
+ bool own_fields; ///< whether object owns the quantizer (false by default)
42
42
 
43
43
  ClusteringParameters cp; ///< to override default clustering params
44
44
  Index* clustering_index; ///< to override index used during clustering
@@ -121,8 +121,7 @@ struct IndexIVF : Index, Level1Quantizer {
121
121
 
122
122
  /** The Inverted file takes a quantizer (an Index) on input,
123
123
  * which implements the function mapping a vector to a list
124
- * identifier. The pointer is borrowed: the quantizer should not
125
- * be deleted while the IndexIVF is in use.
124
+ * identifier.
126
125
  */
127
126
  IndexIVF(
128
127
  Index* quantizer,
@@ -171,6 +170,13 @@ struct IndexIVF : Index, Level1Quantizer {
171
170
  uint8_t* codes,
172
171
  bool include_listno = false) const = 0;
173
172
 
173
+ /** Add vectors that are computed with the standalone codec
174
+ *
175
+ * @param codes codes to add size n * sa_code_size()
176
+ * @param xids corresponding ids, size n
177
+ */
178
+ void add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids);
179
+
174
180
  /// Sub-classes that encode the residuals can train their encoders here
175
181
  /// does nothing by default
176
182
  virtual void train_residual(idx_t n, const float* x);
@@ -231,7 +237,10 @@ struct IndexIVF : Index, Level1Quantizer {
231
237
  const IVFSearchParameters* params = nullptr,
232
238
  IndexIVFStats* stats = nullptr) const;
233
239
 
234
- /// get a scanner for this index (store_pairs means ignore labels)
240
+ /** Get a scanner for this index (store_pairs means ignore labels)
241
+ *
242
+ * The default search implementation uses this to compute the distances
243
+ */
235
244
  virtual InvertedListScanner* get_InvertedListScanner(
236
245
  bool store_pairs = false) const;
237
246
 
@@ -351,6 +360,14 @@ struct RangeQueryResult;
351
360
  struct InvertedListScanner {
352
361
  using idx_t = Index::idx_t;
353
362
 
363
+ idx_t list_no = -1; ///< remember current list
364
+ bool keep_max = false; ///< keep maximum instead of minimum
365
+ /// store positions in invlists rather than labels
366
+ bool store_pairs = false;
367
+
368
+ /// used in default implementation of scan_codes
369
+ size_t code_size = 0;
370
+
354
371
  /// from now on we handle this query.
355
372
  virtual void set_query(const float* query_vector) = 0;
356
373
 
@@ -361,7 +378,8 @@ struct InvertedListScanner {
361
378
  virtual float distance_to_code(const uint8_t* code) const = 0;
362
379
 
363
380
  /** scan a set of codes, compute distances to current query and
364
- * update heap of results if necessary.
381
+ * update heap of results if necessary. Default implemetation
382
+ * calls distance_to_code.
365
383
  *
366
384
  * @param n number of codes to scan
367
385
  * @param codes codes to scan (n * code_size)
@@ -377,7 +395,7 @@ struct InvertedListScanner {
377
395
  const idx_t* ids,
378
396
  float* distances,
379
397
  idx_t* labels,
380
- size_t k) const = 0;
398
+ size_t k) const;
381
399
 
382
400
  /** scan a set of codes, compute distances to current query and
383
401
  * update results if distances are below radius
@@ -396,7 +414,7 @@ struct InvertedListScanner {
396
414
  struct IndexIVFStats {
397
415
  size_t nq; // nb of queries run
398
416
  size_t nlist; // nb of inverted lists scanned
399
- size_t ndis; // nb of distancs computed
417
+ size_t ndis; // nb of distances computed
400
418
  size_t nheap_updates; // nb of times the heap was updated
401
419
  double quantization_time; // time spent quantizing vectors (in ms)
402
420
  double search_time; // time spent searching lists (in ms)