faiss 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +1 -1
  6. data/lib/faiss/version.rb +1 -1
  7. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  8. data/vendor/faiss/faiss/AutoTune.h +6 -3
  9. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  10. data/vendor/faiss/faiss/Index.cpp +3 -4
  11. data/vendor/faiss/faiss/Index.h +3 -3
  12. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  13. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  14. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  15. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  16. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  17. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  18. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  19. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  20. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  21. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  22. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  23. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  24. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  25. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  26. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  27. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  28. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  29. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  30. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  31. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  32. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  33. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  34. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  35. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  36. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  37. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  38. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  39. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  40. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  41. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  42. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  43. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  44. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  45. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  46. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  47. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  48. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  49. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  50. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  51. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  52. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  53. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  54. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  55. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  56. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  57. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  58. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  59. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  60. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  61. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  62. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  63. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  64. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  65. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  66. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  67. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  68. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  69. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  70. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  71. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  72. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  73. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  74. data/vendor/faiss/faiss/impl/io.h +7 -2
  75. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  76. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  77. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  78. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  79. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  80. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  81. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  82. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  83. data/vendor/faiss/faiss/index_io.h +1 -48
  84. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  85. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  86. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  87. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  88. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  89. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  90. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  91. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  92. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  93. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  94. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  95. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  96. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  97. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  98. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  99. data/vendor/faiss/faiss/utils/distances.h +28 -20
  100. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  101. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  102. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  103. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  104. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  105. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  106. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  107. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  108. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  109. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  110. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  111. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  112. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  113. metadata +43 -141
  114. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  115. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  116. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  117. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  118. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  119. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  120. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  121. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  122. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  123. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  124. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  125. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  126. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  127. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  128. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  129. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  130. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  131. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  132. data/vendor/faiss/c_api/Index_c.h +0 -183
  133. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  134. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  135. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  136. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  137. data/vendor/faiss/c_api/error_c.h +0 -42
  138. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  139. data/vendor/faiss/c_api/error_impl.h +0 -16
  140. data/vendor/faiss/c_api/faiss_c.h +0 -58
  141. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  142. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  143. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  144. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  145. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  146. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  147. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  148. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  149. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  150. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  151. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  152. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  153. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  154. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  155. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  156. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  157. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  158. data/vendor/faiss/c_api/index_io_c.h +0 -50
  159. data/vendor/faiss/c_api/macros_impl.h +0 -110
  160. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  161. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  162. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  163. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  164. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  165. data/vendor/faiss/misc/test_blas.cpp +0 -87
  166. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  167. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  168. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  169. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  170. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  171. data/vendor/faiss/tests/test_merge.cpp +0 -260
  172. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  173. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  174. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  175. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  176. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  177. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  178. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  179. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  180. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  181. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  182. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  183. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  184. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -34,6 +34,10 @@
34
34
  #include <faiss/IndexScalarQuantizer.h>
35
35
  #include <faiss/IndexHNSW.h>
36
36
  #include <faiss/IndexLattice.h>
37
+ #include <faiss/IndexPQFastScan.h>
38
+ #include <faiss/IndexIVFPQFastScan.h>
39
+ #include <faiss/IndexRefine.h>
40
+
37
41
 
38
42
  #include <faiss/IndexBinaryFlat.h>
39
43
  #include <faiss/IndexBinaryHNSW.h>
@@ -62,29 +66,90 @@ struct VTChain {
62
66
  /// what kind of training does this coarse quantizer require?
63
67
  char get_trains_alone(const Index *coarse_quantizer) {
64
68
  return
69
+ dynamic_cast<const IndexFlat*>(coarse_quantizer) ? 0 :
70
+ // multi index just needs to be quantized
65
71
  dynamic_cast<const MultiIndexQuantizer*>(coarse_quantizer) ? 1 :
66
72
  dynamic_cast<const IndexHNSWFlat*>(coarse_quantizer) ? 2 :
67
- 0;
73
+ 2; // for complicated indexes, we assume they can't be used as a kmeans index
68
74
  }
69
75
 
76
+ bool str_ends_with(const std::string& s, const std::string& suffix)
77
+ {
78
+ return s.rfind(suffix) == std::abs(int(s.size()-suffix.size()));
79
+ }
80
+
81
+ // check if ends with suffix followed by digits
82
+ bool str_ends_with_digits(const std::string& s, const std::string& suffix)
83
+ {
84
+ int i;
85
+ for(i = s.length() - 1; i >= 0; i--) {
86
+ if (!isdigit(s[i])) break;
87
+ }
88
+ return str_ends_with(s.substr(0, i + 1), suffix);
89
+ }
90
+
91
+ void find_matching_parentheses(const std::string &s, int & i0, int & i1) {
92
+ int st = 0;
93
+ for (int i = 0; i < s.length(); i++) {
94
+ if (s[i] == '(') {
95
+ if (st == 0) {
96
+ i0 = i;
97
+ }
98
+ st++;
99
+ }
100
+
101
+ if (s[i] == ')') {
102
+ st--;
103
+ if (st == 0) {
104
+ i1 = i;
105
+ return;
106
+ }
107
+ if (st < 0) {
108
+ FAISS_THROW_FMT("factory string %s: unbalanced parentheses", s.c_str());
109
+ }
110
+ }
111
+
112
+ }
113
+ FAISS_THROW_FMT("factory string %s: unbalanced parentheses st=%d", s.c_str(), st);
70
114
 
71
115
  }
72
116
 
117
+ } // anonymous namespace
118
+
73
119
  Index *index_factory (int d, const char *description_in, MetricType metric)
74
120
  {
75
121
  FAISS_THROW_IF_NOT(metric == METRIC_L2 ||
76
122
  metric == METRIC_INNER_PRODUCT);
77
123
  VTChain vts;
78
124
  Index *coarse_quantizer = nullptr;
125
+ std::string parenthesis_ivf, parenthesis_refine;
79
126
  Index *index = nullptr;
80
127
  bool add_idmap = false;
81
- bool make_IndexRefineFlat = false;
128
+ int d_in = d;
82
129
 
83
130
  ScopeDeleter1<Index> del_coarse_quantizer, del_index;
84
131
 
85
132
  std::string description(description_in);
86
133
  char *ptr;
87
134
 
135
+ // handle indexes in parentheses
136
+ while (description.find('(') != std::string::npos) {
137
+ // then we make a sub-index and remove the () from the description
138
+ int i0, i1;
139
+ find_matching_parentheses(description, i0, i1);
140
+
141
+ std::string sub_description = description.substr(i0 + 1, i1 - i0 - 1);
142
+
143
+ if (str_ends_with_digits(description.substr(0, i0), "IVF")) {
144
+ parenthesis_ivf = sub_description;
145
+ } else if (str_ends_with(description.substr(0, i0), "Refine")) {
146
+ parenthesis_refine = sub_description;
147
+ } else {
148
+ FAISS_THROW_MSG("don't know what to do with parenthesis index");
149
+ }
150
+ description = description.erase(i0, i1 - i0 + 1);
151
+ }
152
+
88
153
  int64_t ncentroids = -1;
89
154
  bool use_2layer = false;
90
155
  int hnsw_M = -1;
@@ -95,6 +160,8 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
95
160
  int d_out, opq_M, nbit, M, M2, pq_m, ncent, r2;
96
161
  std::string stok(tok);
97
162
  nbit = 8;
163
+ int bbs = -1;
164
+ char c;
98
165
 
99
166
  // to avoid mem leaks with exceptions:
100
167
  // do all tests before any instanciation
@@ -140,15 +207,20 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
140
207
  // coarse quantizers
141
208
  } else if (!coarse_quantizer &&
142
209
  sscanf (tok, "IVF%" PRId64 "_HNSW%d", &ncentroids, &M) == 2) {
143
- coarse_quantizer_1 = new IndexHNSWFlat (d, M);
210
+ coarse_quantizer_1 = new IndexHNSWFlat (d, M, metric);
144
211
 
145
212
  } else if (!coarse_quantizer &&
146
213
  sscanf (tok, "IVF%" PRId64, &ncentroids) == 1) {
147
- if (metric == METRIC_L2) {
214
+ if (!parenthesis_ivf.empty()) {
215
+ coarse_quantizer_1 =
216
+ index_factory(d, parenthesis_ivf.c_str(), metric);
217
+
218
+ } else if (metric == METRIC_L2) {
148
219
  coarse_quantizer_1 = new IndexFlatL2 (d);
149
220
  } else {
150
221
  coarse_quantizer_1 = new IndexFlatIP (d);
151
222
  }
223
+
152
224
  } else if (!coarse_quantizer && sscanf (tok, "IMI2x%d", &nbit) == 1) {
153
225
  FAISS_THROW_IF_NOT_MSG (metric == METRIC_L2,
154
226
  "MultiIndex not implemented for inner prod search");
@@ -228,6 +300,32 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
228
300
  del_coarse_quantizer.release ();
229
301
  index_ivf->own_fields = true;
230
302
  index_1 = index_ivf;
303
+ } else if (!index && (
304
+ sscanf (tok, "PQ%dx4fs_%d", &M, &bbs) == 2 ||
305
+ (sscanf (tok, "PQ%dx4f%c", &M, &c) == 2 && c == 's') ||
306
+ (sscanf (tok, "PQ%dx4fs%c", &M, &c) == 2 && c == 'r'))) {
307
+ if (bbs == -1) {
308
+ bbs = 32;
309
+ }
310
+ bool by_residual = str_ends_with(stok, "fsr");
311
+ if (coarse_quantizer) {
312
+ IndexIVFPQFastScan *index_ivf = new IndexIVFPQFastScan(
313
+ coarse_quantizer, d, ncentroids, M, 4, metric, bbs
314
+ );
315
+ index_ivf->quantizer_trains_alone =
316
+ get_trains_alone (coarse_quantizer);
317
+ index_ivf->metric_type = metric;
318
+ index_ivf->by_residual = by_residual;
319
+ index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT;
320
+ del_coarse_quantizer.release ();
321
+ index_ivf->own_fields = true;
322
+ index_1 = index_ivf;
323
+ } else {
324
+ IndexPQFastScan *index_pq = new IndexPQFastScan (
325
+ d, M, 4, metric, bbs
326
+ );
327
+ index_1 = index_pq;
328
+ }
231
329
  } else if (!index && (sscanf (tok, "PQ%dx%d", &M, &nbit) == 2 ||
232
330
  sscanf (tok, "PQ%d", &M) == 1 ||
233
331
  sscanf (tok, "PQ%dnp", &M) == 1)) {
@@ -299,7 +397,12 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
299
397
  FAISS_THROW_IF_NOT(!coarse_quantizer);
300
398
  index_1 = new IndexLattice(d, M, nbit, r2);
301
399
  } else if (stok == "RFlat") {
302
- make_IndexRefineFlat = true;
400
+ parenthesis_refine = "Flat";
401
+ } else if (stok == "Refine") {
402
+ FAISS_THROW_IF_NOT_MSG(
403
+ !parenthesis_refine.empty(),
404
+ "Refine index should be provided in parentheses"
405
+ );
303
406
  } else {
304
407
  FAISS_THROW_FMT( "could not parse token \"%s\" in %s\n",
305
408
  tok, description_in);
@@ -356,8 +459,10 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
356
459
  index = index_pt;
357
460
  }
358
461
 
359
- if (make_IndexRefineFlat) {
360
- IndexRefineFlat *index_rf = new IndexRefineFlat (index);
462
+ if (!parenthesis_refine.empty()) {
463
+ Index *refine_index = index_factory(d_in, parenthesis_refine.c_str(), metric);
464
+ IndexRefine *index_rf = new IndexRefine(index, refine_index);
465
+ index_rf->own_refine_index = true;
361
466
  index_rf->own_fields = true;
362
467
  index = index_rf;
363
468
  }
@@ -51,7 +51,7 @@ const int IO_FLAG_READ_ONLY = 2;
51
51
  const int IO_FLAG_ONDISK_SAME_DIR = 4;
52
52
  // don't load IVF data to RAM, only list sizes
53
53
  const int IO_FLAG_SKIP_IVF_DATA = 8;
54
- // try to memmap data (useful for OnDiskInvertedLists)
54
+ // try to memmap data (useful to load an ArrayInvertedLists as an OnDiskInvertedLists)
55
55
  const int IO_FLAG_MMAP = IO_FLAG_SKIP_IVF_DATA | 0x646f0000;
56
56
 
57
57
 
@@ -76,53 +76,6 @@ void write_InvertedLists (const InvertedLists *ils, IOWriter *f);
76
76
  InvertedLists *read_InvertedLists (IOReader *reader, int io_flags = 0);
77
77
 
78
78
 
79
- #ifndef _MSC_VER
80
- /** Callbacks to handle other types of InvertedList objects.
81
- *
82
- * The callbacks should be registered with add_callback before calling
83
- * read_index or read_InvertedLists. The callbacks for
84
- * OnDiskInvertedLists are registrered by default. The invlist type is
85
- * identified by:
86
- *
87
- * - the key (a fourcc) at read time
88
- * - the class name (as given by typeid.name) at write time
89
- */
90
- struct InvertedListsIOHook {
91
- const std::string key; ///< string version of the fourcc
92
- const std::string classname; ///< typeid.name
93
-
94
- InvertedListsIOHook(const std::string & key, const std::string & classname);
95
-
96
- /// write the index to the IOWriter (including the fourcc)
97
- virtual void write(const InvertedLists *ils, IOWriter *f) const = 0;
98
-
99
- /// called when the fourcc matches this class's fourcc
100
- virtual InvertedLists * read(IOReader *f, int io_flags) const = 0;
101
-
102
- /** read from a ArrayInvertedLists into this invertedlist type.
103
- * For this to work, the callback has to be enabled and the io_flag has to be set to
104
- * IO_FLAG_SKIP_IVF_DATA | (16 upper bits of the fourcc)
105
- */
106
- virtual InvertedLists * read_ArrayInvertedLists(
107
- IOReader *f, int io_flags,
108
- size_t nlist, size_t code_size,
109
- const std::vector<size_t> &sizes) const = 0;
110
-
111
- virtual ~InvertedListsIOHook() {}
112
-
113
- /**************************** Manage the set of callbacks ******/
114
-
115
- // transfers ownership
116
- static void add_callback(InvertedListsIOHook *);
117
- static void print_callbacks();
118
- static InvertedListsIOHook* lookup(int h);
119
- static InvertedListsIOHook* lookup_classname(const std::string & classname);
120
-
121
- };
122
-
123
- #endif // !_MSC_VER
124
-
125
-
126
79
  } // namespace faiss
127
80
 
128
81
 
@@ -0,0 +1,151 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/invlists/BlockInvertedLists.h>
9
+
10
+ #include <faiss/impl/FaissAssert.h>
11
+
12
+ #include <faiss/impl/io.h>
13
+ #include <faiss/impl/io_macros.h>
14
+
15
+
16
+ namespace faiss {
17
+
18
+ BlockInvertedLists::BlockInvertedLists (
19
+ size_t nlist, size_t n_per_block,
20
+ size_t block_size):
21
+ InvertedLists (nlist, InvertedLists::INVALID_CODE_SIZE),
22
+ n_per_block(n_per_block), block_size(block_size)
23
+ {
24
+ ids.resize (nlist);
25
+ codes.resize (nlist);
26
+ }
27
+
28
+ BlockInvertedLists::BlockInvertedLists ():
29
+ InvertedLists (0, InvertedLists::INVALID_CODE_SIZE),
30
+ n_per_block(0), block_size(0)
31
+ {}
32
+
33
+
34
+ size_t BlockInvertedLists::add_entries (
35
+ size_t list_no, size_t n_entry,
36
+ const idx_t* ids_in, const uint8_t *code)
37
+ {
38
+ if (n_entry == 0) return 0;
39
+ FAISS_THROW_IF_NOT (list_no < nlist);
40
+ size_t o = ids [list_no].size();
41
+ FAISS_THROW_IF_NOT (o == 0); // not clear how we should handle subsequent adds
42
+ ids [list_no].resize (o + n_entry);
43
+ memcpy (&ids[list_no][o], ids_in, sizeof (ids_in[0]) * n_entry);
44
+
45
+ // copy whole blocks
46
+ size_t n_block = (n_entry + n_per_block - 1) / n_per_block;
47
+ codes [list_no].resize (n_block * block_size);
48
+ memcpy (&codes[list_no][o * code_size], code, n_block * block_size);
49
+ return o;
50
+ }
51
+
52
+ size_t BlockInvertedLists::list_size(size_t list_no) const
53
+ {
54
+ assert (list_no < nlist);
55
+ return ids[list_no].size();
56
+ }
57
+
58
+ const uint8_t * BlockInvertedLists::get_codes (size_t list_no) const
59
+ {
60
+ assert (list_no < nlist);
61
+ return codes[list_no].get();
62
+ }
63
+
64
+ const InvertedLists::idx_t * BlockInvertedLists::get_ids (size_t list_no) const
65
+ {
66
+ assert (list_no < nlist);
67
+ return ids[list_no].data();
68
+ }
69
+
70
+ void BlockInvertedLists::resize (size_t list_no, size_t new_size)
71
+ {
72
+ ids[list_no].resize (new_size);
73
+ size_t prev_nbytes = codes[list_no].size();
74
+ size_t n_block = (new_size + n_per_block - 1) / n_per_block;
75
+ size_t new_nbytes = n_block * block_size;
76
+ codes[list_no].resize (new_nbytes);
77
+ if (prev_nbytes < new_nbytes) {
78
+ // set new elements to 0
79
+ memset(
80
+ codes[list_no].data() + prev_nbytes, 0,
81
+ new_nbytes - prev_nbytes
82
+ );
83
+ }
84
+ }
85
+
86
+ void BlockInvertedLists::update_entries (
87
+ size_t , size_t , size_t ,
88
+ const idx_t *, const uint8_t *)
89
+ {
90
+ FAISS_THROW_MSG("not impemented");
91
+ /*
92
+ assert (list_no < nlist);
93
+ assert (n_entry + offset <= ids[list_no].size());
94
+ memcpy (&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
95
+ memcpy (&codes[list_no][offset * code_size], codes_in, code_size * n_entry);
96
+ */
97
+ }
98
+
99
+
100
+ BlockInvertedLists::~BlockInvertedLists ()
101
+ {}
102
+
103
+ /**************************************************
104
+ * IO hook implementation
105
+ **************************************************/
106
+
107
+ BlockInvertedListsIOHook::BlockInvertedListsIOHook():
108
+ InvertedListsIOHook("ilbl", typeid(BlockInvertedLists).name())
109
+ {}
110
+
111
+
112
+ void BlockInvertedListsIOHook::write(const InvertedLists *ils_in, IOWriter *f) const
113
+ {
114
+ uint32_t h = fourcc ("ilbl");
115
+ WRITE1 (h);
116
+ const BlockInvertedLists *il =
117
+ dynamic_cast<const BlockInvertedLists*> (ils_in);
118
+ WRITE1 (il->nlist);
119
+ WRITE1 (il->code_size);
120
+ WRITE1 (il->n_per_block);
121
+ WRITE1 (il->block_size);
122
+
123
+ for (size_t i = 0; i < il->nlist; i++) {
124
+ WRITEVECTOR(il->ids[i]);
125
+ WRITEVECTOR(il->codes[i]);
126
+ }
127
+ }
128
+
129
+ InvertedLists * BlockInvertedListsIOHook::read(IOReader *f, int /* io_flags */) const
130
+ {
131
+ BlockInvertedLists *il = new BlockInvertedLists();
132
+ READ1 (il->nlist);
133
+ READ1 (il->code_size);
134
+ READ1 (il->n_per_block);
135
+ READ1 (il->block_size);
136
+
137
+ il->ids.resize(il->nlist);
138
+ il->codes.resize(il->nlist);
139
+
140
+ for (size_t i = 0; i < il->nlist; i++) {
141
+ READVECTOR(il->ids[i]);
142
+ READVECTOR(il->codes[i]);
143
+ }
144
+
145
+ return il;
146
+ }
147
+
148
+
149
+
150
+
151
+ } // namespace faiss
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <faiss/invlists/InvertedLists.h>
12
+ #include <faiss/invlists/InvertedListsIOHook.h>
13
+ #include <faiss/utils/AlignedTable.h>
14
+ #include <faiss/index_io.h>
15
+
16
+ namespace faiss {
17
+
18
+ /** Inverted Lists that are organized by blocks.
19
+ *
20
+ * Different from the regular inverted lists, the codes are organized by blocks
21
+ * of size block_size bytes that reprsent a set of n_per_block. Therefore, code
22
+ * allocations are always rounded up to block_size bytes. The codes are also
23
+ * aligned on 32-byte boundaries for use with SIMD.
24
+ *
25
+ * To avoid misinterpretations, the code_size is set to (size_t)(-1), even if
26
+ * arguably the amount of memory consumed by code is block_size / n_per_block.
27
+ *
28
+ * The writing functions add_entries and update_entries operate on block-aligned
29
+ * data.
30
+ */
31
+ struct BlockInvertedLists: InvertedLists {
32
+
33
+ size_t n_per_block; // nb of vectors stored per block
34
+ size_t block_size; // nb bytes per block
35
+
36
+ std::vector<AlignedTable<uint8_t>> codes;
37
+ std::vector<std::vector<idx_t>> ids;
38
+
39
+
40
+ BlockInvertedLists (
41
+ size_t nlist, size_t vec_per_block,
42
+ size_t block_size
43
+ );
44
+
45
+ BlockInvertedLists();
46
+
47
+ size_t list_size(size_t list_no) const override;
48
+ const uint8_t * get_codes (size_t list_no) const override;
49
+ const idx_t * get_ids (size_t list_no) const override;
50
+
51
+ // works only on empty BlockInvertedLists
52
+ // the codes should be of size ceil(n_entry / n_per_block) * block_size
53
+ // and padded with 0s
54
+ size_t add_entries (
55
+ size_t list_no, size_t n_entry,
56
+ const idx_t* ids, const uint8_t *code) override;
57
+
58
+ /// not implemented
59
+ void update_entries (size_t list_no, size_t offset, size_t n_entry,
60
+ const idx_t *ids, const uint8_t *code) override;
61
+
62
+ // also pads new data with 0s
63
+ void resize (size_t list_no, size_t new_size) override;
64
+
65
+ ~BlockInvertedLists () override;
66
+
67
+ };
68
+
69
+ struct BlockInvertedListsIOHook : InvertedListsIOHook {
70
+ BlockInvertedListsIOHook();
71
+ void write(const InvertedLists *ils, IOWriter *f) const override;
72
+ InvertedLists * read(IOReader *f, int io_flags) const override;
73
+ };
74
+
75
+
76
+ } // namespace faiss