faiss 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/ext/faiss/extconf.rb +1 -1
  6. data/lib/faiss/version.rb +1 -1
  7. data/vendor/faiss/faiss/AutoTune.cpp +36 -33
  8. data/vendor/faiss/faiss/AutoTune.h +6 -3
  9. data/vendor/faiss/faiss/Clustering.cpp +16 -12
  10. data/vendor/faiss/faiss/Index.cpp +3 -4
  11. data/vendor/faiss/faiss/Index.h +3 -3
  12. data/vendor/faiss/faiss/IndexBinary.cpp +3 -4
  13. data/vendor/faiss/faiss/IndexBinary.h +1 -1
  14. data/vendor/faiss/faiss/IndexBinaryHash.cpp +2 -12
  15. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -2
  16. data/vendor/faiss/faiss/IndexFlat.cpp +0 -148
  17. data/vendor/faiss/faiss/IndexFlat.h +0 -51
  18. data/vendor/faiss/faiss/IndexHNSW.cpp +4 -5
  19. data/vendor/faiss/faiss/IndexIVF.cpp +118 -31
  20. data/vendor/faiss/faiss/IndexIVF.h +22 -15
  21. data/vendor/faiss/faiss/IndexIVFFlat.cpp +3 -3
  22. data/vendor/faiss/faiss/IndexIVFFlat.h +2 -1
  23. data/vendor/faiss/faiss/IndexIVFPQ.cpp +39 -15
  24. data/vendor/faiss/faiss/IndexIVFPQ.h +25 -9
  25. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +1116 -0
  26. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +166 -0
  27. data/vendor/faiss/faiss/IndexIVFPQR.cpp +8 -9
  28. data/vendor/faiss/faiss/IndexIVFPQR.h +2 -1
  29. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -2
  30. data/vendor/faiss/faiss/IndexPQ.cpp +34 -18
  31. data/vendor/faiss/faiss/IndexPQFastScan.cpp +536 -0
  32. data/vendor/faiss/faiss/IndexPQFastScan.h +111 -0
  33. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -0
  34. data/vendor/faiss/faiss/IndexPreTransform.h +2 -0
  35. data/vendor/faiss/faiss/IndexRefine.cpp +256 -0
  36. data/vendor/faiss/faiss/IndexRefine.h +73 -0
  37. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +2 -2
  38. data/vendor/faiss/faiss/IndexScalarQuantizer.h +1 -1
  39. data/vendor/faiss/faiss/gpu/GpuDistance.h +1 -1
  40. data/vendor/faiss/faiss/gpu/GpuIndex.h +16 -9
  41. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +8 -1
  42. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -11
  43. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +19 -2
  44. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +28 -2
  45. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +24 -14
  46. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +29 -2
  47. data/vendor/faiss/faiss/gpu/GpuResources.h +4 -0
  48. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +60 -27
  49. data/vendor/faiss/faiss/gpu/StandardGpuResources.h +28 -6
  50. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +547 -0
  51. data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.h +51 -0
  52. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +3 -3
  53. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +3 -2
  54. data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +274 -0
  55. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +7 -2
  56. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +5 -1
  57. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +231 -0
  58. data/vendor/faiss/faiss/gpu/test/TestUtils.h +33 -0
  59. data/vendor/faiss/faiss/gpu/utils/StackDeviceMemory.cpp +1 -0
  60. data/vendor/faiss/faiss/gpu/utils/StaticUtils.h +6 -0
  61. data/vendor/faiss/faiss/gpu/utils/Timer.cpp +5 -6
  62. data/vendor/faiss/faiss/gpu/utils/Timer.h +2 -2
  63. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +5 -4
  64. data/vendor/faiss/faiss/impl/HNSW.cpp +2 -4
  65. data/vendor/faiss/faiss/impl/PolysemousTraining.h +4 -4
  66. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +22 -12
  67. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -0
  68. data/vendor/faiss/faiss/impl/ResultHandler.h +452 -0
  69. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +29 -19
  70. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +6 -0
  71. data/vendor/faiss/faiss/impl/index_read.cpp +64 -96
  72. data/vendor/faiss/faiss/impl/index_write.cpp +34 -25
  73. data/vendor/faiss/faiss/impl/io.cpp +33 -2
  74. data/vendor/faiss/faiss/impl/io.h +7 -2
  75. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -15
  76. data/vendor/faiss/faiss/impl/platform_macros.h +44 -0
  77. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +272 -0
  78. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +169 -0
  79. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +180 -0
  80. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +354 -0
  81. data/vendor/faiss/faiss/impl/simd_result_handlers.h +559 -0
  82. data/vendor/faiss/faiss/index_factory.cpp +112 -7
  83. data/vendor/faiss/faiss/index_io.h +1 -48
  84. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +151 -0
  85. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +76 -0
  86. data/vendor/faiss/faiss/{DirectMap.cpp → invlists/DirectMap.cpp} +1 -1
  87. data/vendor/faiss/faiss/{DirectMap.h → invlists/DirectMap.h} +1 -1
  88. data/vendor/faiss/faiss/{InvertedLists.cpp → invlists/InvertedLists.cpp} +72 -1
  89. data/vendor/faiss/faiss/{InvertedLists.h → invlists/InvertedLists.h} +32 -1
  90. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.cpp +107 -0
  91. data/vendor/faiss/faiss/invlists/InvertedListsIOHook.h +63 -0
  92. data/vendor/faiss/faiss/{OnDiskInvertedLists.cpp → invlists/OnDiskInvertedLists.cpp} +21 -6
  93. data/vendor/faiss/faiss/{OnDiskInvertedLists.h → invlists/OnDiskInvertedLists.h} +5 -2
  94. data/vendor/faiss/faiss/python/python_callbacks.h +8 -1
  95. data/vendor/faiss/faiss/utils/AlignedTable.h +141 -0
  96. data/vendor/faiss/faiss/utils/Heap.cpp +2 -4
  97. data/vendor/faiss/faiss/utils/Heap.h +61 -50
  98. data/vendor/faiss/faiss/utils/distances.cpp +164 -319
  99. data/vendor/faiss/faiss/utils/distances.h +28 -20
  100. data/vendor/faiss/faiss/utils/distances_simd.cpp +277 -49
  101. data/vendor/faiss/faiss/utils/extra_distances.cpp +1 -2
  102. data/vendor/faiss/faiss/utils/hamming-inl.h +4 -4
  103. data/vendor/faiss/faiss/utils/hamming.cpp +3 -6
  104. data/vendor/faiss/faiss/utils/hamming.h +2 -7
  105. data/vendor/faiss/faiss/utils/ordered_key_value.h +98 -0
  106. data/vendor/faiss/faiss/utils/partitioning.cpp +1256 -0
  107. data/vendor/faiss/faiss/utils/partitioning.h +69 -0
  108. data/vendor/faiss/faiss/utils/quantize_lut.cpp +277 -0
  109. data/vendor/faiss/faiss/utils/quantize_lut.h +80 -0
  110. data/vendor/faiss/faiss/utils/simdlib.h +31 -0
  111. data/vendor/faiss/faiss/utils/simdlib_avx2.h +461 -0
  112. data/vendor/faiss/faiss/utils/simdlib_emulated.h +589 -0
  113. metadata +43 -141
  114. data/vendor/faiss/benchs/bench_6bit_codec.cpp +0 -80
  115. data/vendor/faiss/c_api/AutoTune_c.cpp +0 -83
  116. data/vendor/faiss/c_api/AutoTune_c.h +0 -66
  117. data/vendor/faiss/c_api/Clustering_c.cpp +0 -145
  118. data/vendor/faiss/c_api/Clustering_c.h +0 -123
  119. data/vendor/faiss/c_api/IndexFlat_c.cpp +0 -140
  120. data/vendor/faiss/c_api/IndexFlat_c.h +0 -115
  121. data/vendor/faiss/c_api/IndexIVFFlat_c.cpp +0 -64
  122. data/vendor/faiss/c_api/IndexIVFFlat_c.h +0 -58
  123. data/vendor/faiss/c_api/IndexIVF_c.cpp +0 -99
  124. data/vendor/faiss/c_api/IndexIVF_c.h +0 -142
  125. data/vendor/faiss/c_api/IndexLSH_c.cpp +0 -37
  126. data/vendor/faiss/c_api/IndexLSH_c.h +0 -40
  127. data/vendor/faiss/c_api/IndexPreTransform_c.cpp +0 -21
  128. data/vendor/faiss/c_api/IndexPreTransform_c.h +0 -32
  129. data/vendor/faiss/c_api/IndexShards_c.cpp +0 -38
  130. data/vendor/faiss/c_api/IndexShards_c.h +0 -39
  131. data/vendor/faiss/c_api/Index_c.cpp +0 -105
  132. data/vendor/faiss/c_api/Index_c.h +0 -183
  133. data/vendor/faiss/c_api/MetaIndexes_c.cpp +0 -49
  134. data/vendor/faiss/c_api/MetaIndexes_c.h +0 -49
  135. data/vendor/faiss/c_api/clone_index_c.cpp +0 -23
  136. data/vendor/faiss/c_api/clone_index_c.h +0 -32
  137. data/vendor/faiss/c_api/error_c.h +0 -42
  138. data/vendor/faiss/c_api/error_impl.cpp +0 -27
  139. data/vendor/faiss/c_api/error_impl.h +0 -16
  140. data/vendor/faiss/c_api/faiss_c.h +0 -58
  141. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.cpp +0 -98
  142. data/vendor/faiss/c_api/gpu/GpuAutoTune_c.h +0 -56
  143. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.cpp +0 -52
  144. data/vendor/faiss/c_api/gpu/GpuClonerOptions_c.h +0 -68
  145. data/vendor/faiss/c_api/gpu/GpuIndex_c.cpp +0 -17
  146. data/vendor/faiss/c_api/gpu/GpuIndex_c.h +0 -30
  147. data/vendor/faiss/c_api/gpu/GpuIndicesOptions_c.h +0 -38
  148. data/vendor/faiss/c_api/gpu/GpuResources_c.cpp +0 -86
  149. data/vendor/faiss/c_api/gpu/GpuResources_c.h +0 -66
  150. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.cpp +0 -54
  151. data/vendor/faiss/c_api/gpu/StandardGpuResources_c.h +0 -53
  152. data/vendor/faiss/c_api/gpu/macros_impl.h +0 -42
  153. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.cpp +0 -220
  154. data/vendor/faiss/c_api/impl/AuxIndexStructures_c.h +0 -149
  155. data/vendor/faiss/c_api/index_factory_c.cpp +0 -26
  156. data/vendor/faiss/c_api/index_factory_c.h +0 -30
  157. data/vendor/faiss/c_api/index_io_c.cpp +0 -42
  158. data/vendor/faiss/c_api/index_io_c.h +0 -50
  159. data/vendor/faiss/c_api/macros_impl.h +0 -110
  160. data/vendor/faiss/demos/demo_imi_flat.cpp +0 -154
  161. data/vendor/faiss/demos/demo_imi_pq.cpp +0 -203
  162. data/vendor/faiss/demos/demo_ivfpq_indexing.cpp +0 -151
  163. data/vendor/faiss/demos/demo_sift1M.cpp +0 -252
  164. data/vendor/faiss/demos/demo_weighted_kmeans.cpp +0 -185
  165. data/vendor/faiss/misc/test_blas.cpp +0 -87
  166. data/vendor/faiss/tests/test_binary_flat.cpp +0 -62
  167. data/vendor/faiss/tests/test_dealloc_invlists.cpp +0 -188
  168. data/vendor/faiss/tests/test_ivfpq_codec.cpp +0 -70
  169. data/vendor/faiss/tests/test_ivfpq_indexing.cpp +0 -100
  170. data/vendor/faiss/tests/test_lowlevel_ivf.cpp +0 -573
  171. data/vendor/faiss/tests/test_merge.cpp +0 -260
  172. data/vendor/faiss/tests/test_omp_threads.cpp +0 -14
  173. data/vendor/faiss/tests/test_ondisk_ivf.cpp +0 -225
  174. data/vendor/faiss/tests/test_pairs_decoding.cpp +0 -193
  175. data/vendor/faiss/tests/test_params_override.cpp +0 -236
  176. data/vendor/faiss/tests/test_pq_encoding.cpp +0 -98
  177. data/vendor/faiss/tests/test_sliding_ivf.cpp +0 -246
  178. data/vendor/faiss/tests/test_threaded_index.cpp +0 -253
  179. data/vendor/faiss/tests/test_transfer_invlists.cpp +0 -159
  180. data/vendor/faiss/tutorial/cpp/1-Flat.cpp +0 -104
  181. data/vendor/faiss/tutorial/cpp/2-IVFFlat.cpp +0 -85
  182. data/vendor/faiss/tutorial/cpp/3-IVFPQ.cpp +0 -98
  183. data/vendor/faiss/tutorial/cpp/4-GPU.cpp +0 -122
  184. data/vendor/faiss/tutorial/cpp/5-Multiple-GPUs.cpp +0 -104
@@ -34,6 +34,10 @@
34
34
  #include <faiss/IndexScalarQuantizer.h>
35
35
  #include <faiss/IndexHNSW.h>
36
36
  #include <faiss/IndexLattice.h>
37
+ #include <faiss/IndexPQFastScan.h>
38
+ #include <faiss/IndexIVFPQFastScan.h>
39
+ #include <faiss/IndexRefine.h>
40
+
37
41
 
38
42
  #include <faiss/IndexBinaryFlat.h>
39
43
  #include <faiss/IndexBinaryHNSW.h>
@@ -62,29 +66,90 @@ struct VTChain {
62
66
  /// what kind of training does this coarse quantizer require?
63
67
  char get_trains_alone(const Index *coarse_quantizer) {
64
68
  return
69
+ dynamic_cast<const IndexFlat*>(coarse_quantizer) ? 0 :
70
+ // multi index just needs to be quantized
65
71
  dynamic_cast<const MultiIndexQuantizer*>(coarse_quantizer) ? 1 :
66
72
  dynamic_cast<const IndexHNSWFlat*>(coarse_quantizer) ? 2 :
67
- 0;
73
+ 2; // for complicated indexes, we assume they can't be used as a kmeans index
68
74
  }
69
75
 
76
+ bool str_ends_with(const std::string& s, const std::string& suffix)
77
+ {
78
+ return s.rfind(suffix) == std::abs(int(s.size()-suffix.size()));
79
+ }
80
+
81
+ // check if ends with suffix followed by digits
82
+ bool str_ends_with_digits(const std::string& s, const std::string& suffix)
83
+ {
84
+ int i;
85
+ for(i = s.length() - 1; i >= 0; i--) {
86
+ if (!isdigit(s[i])) break;
87
+ }
88
+ return str_ends_with(s.substr(0, i + 1), suffix);
89
+ }
90
+
91
+ void find_matching_parentheses(const std::string &s, int & i0, int & i1) {
92
+ int st = 0;
93
+ for (int i = 0; i < s.length(); i++) {
94
+ if (s[i] == '(') {
95
+ if (st == 0) {
96
+ i0 = i;
97
+ }
98
+ st++;
99
+ }
100
+
101
+ if (s[i] == ')') {
102
+ st--;
103
+ if (st == 0) {
104
+ i1 = i;
105
+ return;
106
+ }
107
+ if (st < 0) {
108
+ FAISS_THROW_FMT("factory string %s: unbalanced parentheses", s.c_str());
109
+ }
110
+ }
111
+
112
+ }
113
+ FAISS_THROW_FMT("factory string %s: unbalanced parentheses st=%d", s.c_str(), st);
70
114
 
71
115
  }
72
116
 
117
+ } // anonymous namespace
118
+
73
119
  Index *index_factory (int d, const char *description_in, MetricType metric)
74
120
  {
75
121
  FAISS_THROW_IF_NOT(metric == METRIC_L2 ||
76
122
  metric == METRIC_INNER_PRODUCT);
77
123
  VTChain vts;
78
124
  Index *coarse_quantizer = nullptr;
125
+ std::string parenthesis_ivf, parenthesis_refine;
79
126
  Index *index = nullptr;
80
127
  bool add_idmap = false;
81
- bool make_IndexRefineFlat = false;
128
+ int d_in = d;
82
129
 
83
130
  ScopeDeleter1<Index> del_coarse_quantizer, del_index;
84
131
 
85
132
  std::string description(description_in);
86
133
  char *ptr;
87
134
 
135
+ // handle indexes in parentheses
136
+ while (description.find('(') != std::string::npos) {
137
+ // then we make a sub-index and remove the () from the description
138
+ int i0, i1;
139
+ find_matching_parentheses(description, i0, i1);
140
+
141
+ std::string sub_description = description.substr(i0 + 1, i1 - i0 - 1);
142
+
143
+ if (str_ends_with_digits(description.substr(0, i0), "IVF")) {
144
+ parenthesis_ivf = sub_description;
145
+ } else if (str_ends_with(description.substr(0, i0), "Refine")) {
146
+ parenthesis_refine = sub_description;
147
+ } else {
148
+ FAISS_THROW_MSG("don't know what to do with parenthesis index");
149
+ }
150
+ description = description.erase(i0, i1 - i0 + 1);
151
+ }
152
+
88
153
  int64_t ncentroids = -1;
89
154
  bool use_2layer = false;
90
155
  int hnsw_M = -1;
@@ -95,6 +160,8 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
95
160
  int d_out, opq_M, nbit, M, M2, pq_m, ncent, r2;
96
161
  std::string stok(tok);
97
162
  nbit = 8;
163
+ int bbs = -1;
164
+ char c;
98
165
 
99
166
  // to avoid mem leaks with exceptions:
100
167
  // do all tests before any instanciation
@@ -140,15 +207,20 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
140
207
  // coarse quantizers
141
208
  } else if (!coarse_quantizer &&
142
209
  sscanf (tok, "IVF%" PRId64 "_HNSW%d", &ncentroids, &M) == 2) {
143
- coarse_quantizer_1 = new IndexHNSWFlat (d, M);
210
+ coarse_quantizer_1 = new IndexHNSWFlat (d, M, metric);
144
211
 
145
212
  } else if (!coarse_quantizer &&
146
213
  sscanf (tok, "IVF%" PRId64, &ncentroids) == 1) {
147
- if (metric == METRIC_L2) {
214
+ if (!parenthesis_ivf.empty()) {
215
+ coarse_quantizer_1 =
216
+ index_factory(d, parenthesis_ivf.c_str(), metric);
217
+
218
+ } else if (metric == METRIC_L2) {
148
219
  coarse_quantizer_1 = new IndexFlatL2 (d);
149
220
  } else {
150
221
  coarse_quantizer_1 = new IndexFlatIP (d);
151
222
  }
223
+
152
224
  } else if (!coarse_quantizer && sscanf (tok, "IMI2x%d", &nbit) == 1) {
153
225
  FAISS_THROW_IF_NOT_MSG (metric == METRIC_L2,
154
226
  "MultiIndex not implemented for inner prod search");
@@ -228,6 +300,32 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
228
300
  del_coarse_quantizer.release ();
229
301
  index_ivf->own_fields = true;
230
302
  index_1 = index_ivf;
303
+ } else if (!index && (
304
+ sscanf (tok, "PQ%dx4fs_%d", &M, &bbs) == 2 ||
305
+ (sscanf (tok, "PQ%dx4f%c", &M, &c) == 2 && c == 's') ||
306
+ (sscanf (tok, "PQ%dx4fs%c", &M, &c) == 2 && c == 'r'))) {
307
+ if (bbs == -1) {
308
+ bbs = 32;
309
+ }
310
+ bool by_residual = str_ends_with(stok, "fsr");
311
+ if (coarse_quantizer) {
312
+ IndexIVFPQFastScan *index_ivf = new IndexIVFPQFastScan(
313
+ coarse_quantizer, d, ncentroids, M, 4, metric, bbs
314
+ );
315
+ index_ivf->quantizer_trains_alone =
316
+ get_trains_alone (coarse_quantizer);
317
+ index_ivf->metric_type = metric;
318
+ index_ivf->by_residual = by_residual;
319
+ index_ivf->cp.spherical = metric == METRIC_INNER_PRODUCT;
320
+ del_coarse_quantizer.release ();
321
+ index_ivf->own_fields = true;
322
+ index_1 = index_ivf;
323
+ } else {
324
+ IndexPQFastScan *index_pq = new IndexPQFastScan (
325
+ d, M, 4, metric, bbs
326
+ );
327
+ index_1 = index_pq;
328
+ }
231
329
  } else if (!index && (sscanf (tok, "PQ%dx%d", &M, &nbit) == 2 ||
232
330
  sscanf (tok, "PQ%d", &M) == 1 ||
233
331
  sscanf (tok, "PQ%dnp", &M) == 1)) {
@@ -299,7 +397,12 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
299
397
  FAISS_THROW_IF_NOT(!coarse_quantizer);
300
398
  index_1 = new IndexLattice(d, M, nbit, r2);
301
399
  } else if (stok == "RFlat") {
302
- make_IndexRefineFlat = true;
400
+ parenthesis_refine = "Flat";
401
+ } else if (stok == "Refine") {
402
+ FAISS_THROW_IF_NOT_MSG(
403
+ !parenthesis_refine.empty(),
404
+ "Refine index should be provided in parentheses"
405
+ );
303
406
  } else {
304
407
  FAISS_THROW_FMT( "could not parse token \"%s\" in %s\n",
305
408
  tok, description_in);
@@ -356,8 +459,10 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
356
459
  index = index_pt;
357
460
  }
358
461
 
359
- if (make_IndexRefineFlat) {
360
- IndexRefineFlat *index_rf = new IndexRefineFlat (index);
462
+ if (!parenthesis_refine.empty()) {
463
+ Index *refine_index = index_factory(d_in, parenthesis_refine.c_str(), metric);
464
+ IndexRefine *index_rf = new IndexRefine(index, refine_index);
465
+ index_rf->own_refine_index = true;
361
466
  index_rf->own_fields = true;
362
467
  index = index_rf;
363
468
  }
@@ -51,7 +51,7 @@ const int IO_FLAG_READ_ONLY = 2;
51
51
  const int IO_FLAG_ONDISK_SAME_DIR = 4;
52
52
  // don't load IVF data to RAM, only list sizes
53
53
  const int IO_FLAG_SKIP_IVF_DATA = 8;
54
- // try to memmap data (useful for OnDiskInvertedLists)
54
+ // try to memmap data (useful to load an ArrayInvertedLists as an OnDiskInvertedLists)
55
55
  const int IO_FLAG_MMAP = IO_FLAG_SKIP_IVF_DATA | 0x646f0000;
56
56
 
57
57
 
@@ -76,53 +76,6 @@ void write_InvertedLists (const InvertedLists *ils, IOWriter *f);
76
76
  InvertedLists *read_InvertedLists (IOReader *reader, int io_flags = 0);
77
77
 
78
78
 
79
- #ifndef _MSC_VER
80
- /** Callbacks to handle other types of InvertedList objects.
81
- *
82
- * The callbacks should be registered with add_callback before calling
83
- * read_index or read_InvertedLists. The callbacks for
84
- * OnDiskInvertedLists are registrered by default. The invlist type is
85
- * identified by:
86
- *
87
- * - the key (a fourcc) at read time
88
- * - the class name (as given by typeid.name) at write time
89
- */
90
- struct InvertedListsIOHook {
91
- const std::string key; ///< string version of the fourcc
92
- const std::string classname; ///< typeid.name
93
-
94
- InvertedListsIOHook(const std::string & key, const std::string & classname);
95
-
96
- /// write the index to the IOWriter (including the fourcc)
97
- virtual void write(const InvertedLists *ils, IOWriter *f) const = 0;
98
-
99
- /// called when the fourcc matches this class's fourcc
100
- virtual InvertedLists * read(IOReader *f, int io_flags) const = 0;
101
-
102
- /** read from a ArrayInvertedLists into this invertedlist type.
103
- * For this to work, the callback has to be enabled and the io_flag has to be set to
104
- * IO_FLAG_SKIP_IVF_DATA | (16 upper bits of the fourcc)
105
- */
106
- virtual InvertedLists * read_ArrayInvertedLists(
107
- IOReader *f, int io_flags,
108
- size_t nlist, size_t code_size,
109
- const std::vector<size_t> &sizes) const = 0;
110
-
111
- virtual ~InvertedListsIOHook() {}
112
-
113
- /**************************** Manage the set of callbacks ******/
114
-
115
- // transfers ownership
116
- static void add_callback(InvertedListsIOHook *);
117
- static void print_callbacks();
118
- static InvertedListsIOHook* lookup(int h);
119
- static InvertedListsIOHook* lookup_classname(const std::string & classname);
120
-
121
- };
122
-
123
- #endif // !_MSC_VER
124
-
125
-
126
79
  } // namespace faiss
127
80
 
128
81
 
@@ -0,0 +1,151 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #include <faiss/invlists/BlockInvertedLists.h>
9
+
10
+ #include <faiss/impl/FaissAssert.h>
11
+
12
+ #include <faiss/impl/io.h>
13
+ #include <faiss/impl/io_macros.h>
14
+
15
+
16
+ namespace faiss {
17
+
18
+ BlockInvertedLists::BlockInvertedLists (
19
+ size_t nlist, size_t n_per_block,
20
+ size_t block_size):
21
+ InvertedLists (nlist, InvertedLists::INVALID_CODE_SIZE),
22
+ n_per_block(n_per_block), block_size(block_size)
23
+ {
24
+ ids.resize (nlist);
25
+ codes.resize (nlist);
26
+ }
27
+
28
+ BlockInvertedLists::BlockInvertedLists ():
29
+ InvertedLists (0, InvertedLists::INVALID_CODE_SIZE),
30
+ n_per_block(0), block_size(0)
31
+ {}
32
+
33
+
34
+ size_t BlockInvertedLists::add_entries (
35
+ size_t list_no, size_t n_entry,
36
+ const idx_t* ids_in, const uint8_t *code)
37
+ {
38
+ if (n_entry == 0) return 0;
39
+ FAISS_THROW_IF_NOT (list_no < nlist);
40
+ size_t o = ids [list_no].size();
41
+ FAISS_THROW_IF_NOT (o == 0); // not clear how we should handle subsequent adds
42
+ ids [list_no].resize (o + n_entry);
43
+ memcpy (&ids[list_no][o], ids_in, sizeof (ids_in[0]) * n_entry);
44
+
45
+ // copy whole blocks
46
+ size_t n_block = (n_entry + n_per_block - 1) / n_per_block;
47
+ codes [list_no].resize (n_block * block_size);
48
+ memcpy (&codes[list_no][o * code_size], code, n_block * block_size);
49
+ return o;
50
+ }
51
+
52
+ size_t BlockInvertedLists::list_size(size_t list_no) const
53
+ {
54
+ assert (list_no < nlist);
55
+ return ids[list_no].size();
56
+ }
57
+
58
+ const uint8_t * BlockInvertedLists::get_codes (size_t list_no) const
59
+ {
60
+ assert (list_no < nlist);
61
+ return codes[list_no].get();
62
+ }
63
+
64
+ const InvertedLists::idx_t * BlockInvertedLists::get_ids (size_t list_no) const
65
+ {
66
+ assert (list_no < nlist);
67
+ return ids[list_no].data();
68
+ }
69
+
70
+ void BlockInvertedLists::resize (size_t list_no, size_t new_size)
71
+ {
72
+ ids[list_no].resize (new_size);
73
+ size_t prev_nbytes = codes[list_no].size();
74
+ size_t n_block = (new_size + n_per_block - 1) / n_per_block;
75
+ size_t new_nbytes = n_block * block_size;
76
+ codes[list_no].resize (new_nbytes);
77
+ if (prev_nbytes < new_nbytes) {
78
+ // set new elements to 0
79
+ memset(
80
+ codes[list_no].data() + prev_nbytes, 0,
81
+ new_nbytes - prev_nbytes
82
+ );
83
+ }
84
+ }
85
+
86
+ void BlockInvertedLists::update_entries (
87
+ size_t , size_t , size_t ,
88
+ const idx_t *, const uint8_t *)
89
+ {
90
+ FAISS_THROW_MSG("not impemented");
91
+ /*
92
+ assert (list_no < nlist);
93
+ assert (n_entry + offset <= ids[list_no].size());
94
+ memcpy (&ids[list_no][offset], ids_in, sizeof(ids_in[0]) * n_entry);
95
+ memcpy (&codes[list_no][offset * code_size], codes_in, code_size * n_entry);
96
+ */
97
+ }
98
+
99
+
100
+ BlockInvertedLists::~BlockInvertedLists ()
101
+ {}
102
+
103
+ /**************************************************
104
+ * IO hook implementation
105
+ **************************************************/
106
+
107
+ BlockInvertedListsIOHook::BlockInvertedListsIOHook():
108
+ InvertedListsIOHook("ilbl", typeid(BlockInvertedLists).name())
109
+ {}
110
+
111
+
112
+ void BlockInvertedListsIOHook::write(const InvertedLists *ils_in, IOWriter *f) const
113
+ {
114
+ uint32_t h = fourcc ("ilbl");
115
+ WRITE1 (h);
116
+ const BlockInvertedLists *il =
117
+ dynamic_cast<const BlockInvertedLists*> (ils_in);
118
+ WRITE1 (il->nlist);
119
+ WRITE1 (il->code_size);
120
+ WRITE1 (il->n_per_block);
121
+ WRITE1 (il->block_size);
122
+
123
+ for (size_t i = 0; i < il->nlist; i++) {
124
+ WRITEVECTOR(il->ids[i]);
125
+ WRITEVECTOR(il->codes[i]);
126
+ }
127
+ }
128
+
129
+ InvertedLists * BlockInvertedListsIOHook::read(IOReader *f, int /* io_flags */) const
130
+ {
131
+ BlockInvertedLists *il = new BlockInvertedLists();
132
+ READ1 (il->nlist);
133
+ READ1 (il->code_size);
134
+ READ1 (il->n_per_block);
135
+ READ1 (il->block_size);
136
+
137
+ il->ids.resize(il->nlist);
138
+ il->codes.resize(il->nlist);
139
+
140
+ for (size_t i = 0; i < il->nlist; i++) {
141
+ READVECTOR(il->ids[i]);
142
+ READVECTOR(il->codes[i]);
143
+ }
144
+
145
+ return il;
146
+ }
147
+
148
+
149
+
150
+
151
+ } // namespace faiss
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+
9
+ #pragma once
10
+
11
+ #include <faiss/invlists/InvertedLists.h>
12
+ #include <faiss/invlists/InvertedListsIOHook.h>
13
+ #include <faiss/utils/AlignedTable.h>
14
+ #include <faiss/index_io.h>
15
+
16
+ namespace faiss {
17
+
18
+ /** Inverted Lists that are organized by blocks.
19
+ *
20
+ * Different from the regular inverted lists, the codes are organized by blocks
21
+ * of size block_size bytes that reprsent a set of n_per_block. Therefore, code
22
+ * allocations are always rounded up to block_size bytes. The codes are also
23
+ * aligned on 32-byte boundaries for use with SIMD.
24
+ *
25
+ * To avoid misinterpretations, the code_size is set to (size_t)(-1), even if
26
+ * arguably the amount of memory consumed by code is block_size / n_per_block.
27
+ *
28
+ * The writing functions add_entries and update_entries operate on block-aligned
29
+ * data.
30
+ */
31
+ struct BlockInvertedLists: InvertedLists {
32
+
33
+ size_t n_per_block; // nb of vectors stored per block
34
+ size_t block_size; // nb bytes per block
35
+
36
+ std::vector<AlignedTable<uint8_t>> codes;
37
+ std::vector<std::vector<idx_t>> ids;
38
+
39
+
40
+ BlockInvertedLists (
41
+ size_t nlist, size_t vec_per_block,
42
+ size_t block_size
43
+ );
44
+
45
+ BlockInvertedLists();
46
+
47
+ size_t list_size(size_t list_no) const override;
48
+ const uint8_t * get_codes (size_t list_no) const override;
49
+ const idx_t * get_ids (size_t list_no) const override;
50
+
51
+ // works only on empty BlockInvertedLists
52
+ // the codes should be of size ceil(n_entry / n_per_block) * block_size
53
+ // and padded with 0s
54
+ size_t add_entries (
55
+ size_t list_no, size_t n_entry,
56
+ const idx_t* ids, const uint8_t *code) override;
57
+
58
+ /// not implemented
59
+ void update_entries (size_t list_no, size_t offset, size_t n_entry,
60
+ const idx_t *ids, const uint8_t *code) override;
61
+
62
+ // also pads new data with 0s
63
+ void resize (size_t list_no, size_t new_size) override;
64
+
65
+ ~BlockInvertedLists () override;
66
+
67
+ };
68
+
69
+ struct BlockInvertedListsIOHook : InvertedListsIOHook {
70
+ BlockInvertedListsIOHook();
71
+ void write(const InvertedLists *ils, IOWriter *f) const override;
72
+ InvertedLists * read(IOReader *f, int io_flags) const override;
73
+ };
74
+
75
+
76
+ } // namespace faiss