faiss 0.2.6 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (189) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/lib/faiss.rb +2 -2
  6. data/vendor/faiss/faiss/AutoTune.cpp +15 -4
  7. data/vendor/faiss/faiss/AutoTune.h +0 -1
  8. data/vendor/faiss/faiss/Clustering.cpp +1 -5
  9. data/vendor/faiss/faiss/Clustering.h +0 -2
  10. data/vendor/faiss/faiss/IVFlib.h +0 -2
  11. data/vendor/faiss/faiss/Index.h +1 -2
  12. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
  13. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
  14. data/vendor/faiss/faiss/IndexBinary.h +0 -1
  15. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
  16. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
  17. data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
  18. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
  19. data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
  20. data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
  21. data/vendor/faiss/faiss/IndexFastScan.h +5 -1
  22. data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
  23. data/vendor/faiss/faiss/IndexFlat.h +1 -1
  24. data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
  25. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
  26. data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
  27. data/vendor/faiss/faiss/IndexHNSW.h +0 -1
  28. data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
  29. data/vendor/faiss/faiss/IndexIDMap.h +0 -2
  30. data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
  31. data/vendor/faiss/faiss/IndexIVF.h +121 -61
  32. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
  33. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
  34. data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
  35. data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
  36. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
  38. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
  39. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
  41. data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
  42. data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
  43. data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
  44. data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
  45. data/vendor/faiss/faiss/IndexReplicas.h +0 -1
  46. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
  47. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
  48. data/vendor/faiss/faiss/IndexShards.cpp +26 -109
  49. data/vendor/faiss/faiss/IndexShards.h +2 -3
  50. data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
  51. data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
  52. data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
  53. data/vendor/faiss/faiss/MetaIndexes.h +29 -0
  54. data/vendor/faiss/faiss/MetricType.h +14 -0
  55. data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
  56. data/vendor/faiss/faiss/VectorTransform.h +1 -3
  57. data/vendor/faiss/faiss/clone_index.cpp +232 -18
  58. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
  59. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
  60. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
  61. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
  62. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
  63. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
  64. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
  65. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
  66. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
  67. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
  68. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
  69. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
  70. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
  71. data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
  72. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
  73. data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
  74. data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
  75. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
  76. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
  77. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
  78. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
  79. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
  80. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
  81. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
  82. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
  83. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
  84. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
  85. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
  86. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
  87. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
  88. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
  89. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
  90. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
  91. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
  92. data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
  93. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  94. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  95. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
  96. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
  97. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  98. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
  99. data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
  100. data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
  101. data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
  102. data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
  103. data/vendor/faiss/faiss/impl/HNSW.h +6 -9
  104. data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
  105. data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
  106. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
  107. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
  108. data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
  109. data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
  110. data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
  111. data/vendor/faiss/faiss/impl/NSG.h +4 -7
  112. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
  113. data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
  114. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
  115. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
  116. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
  117. data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
  118. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
  119. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
  121. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
  122. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
  123. data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
  124. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
  125. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
  126. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
  127. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
  128. data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
  129. data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
  130. data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
  131. data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
  132. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  133. data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
  134. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
  135. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
  136. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
  137. data/vendor/faiss/faiss/index_factory.cpp +8 -10
  138. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
  139. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
  140. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  141. data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
  142. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
  143. data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
  144. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  145. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  146. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  147. data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
  148. data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
  149. data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
  150. data/vendor/faiss/faiss/utils/Heap.h +35 -1
  151. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
  152. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
  153. data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
  154. data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
  155. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
  156. data/vendor/faiss/faiss/utils/distances.cpp +61 -7
  157. data/vendor/faiss/faiss/utils/distances.h +11 -0
  158. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
  159. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
  160. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
  161. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
  162. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
  163. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
  164. data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
  165. data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
  166. data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
  167. data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
  168. data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
  169. data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
  170. data/vendor/faiss/faiss/utils/fp16.h +7 -0
  171. data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
  172. data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
  173. data/vendor/faiss/faiss/utils/hamming.h +21 -10
  174. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
  175. data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
  176. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
  177. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
  178. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
  179. data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
  180. data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
  181. data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
  182. data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
  183. data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
  184. data/vendor/faiss/faiss/utils/sorting.h +71 -0
  185. data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
  186. data/vendor/faiss/faiss/utils/utils.cpp +4 -176
  187. data/vendor/faiss/faiss/utils/utils.h +2 -9
  188. metadata +29 -3
  189. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -13,6 +13,8 @@
13
13
  #include <faiss/Clustering.h>
14
14
  #include <faiss/impl/AdditiveQuantizer.h>
15
15
 
16
+ #include <faiss/utils/approx_topk/mode.h>
17
+
16
18
  namespace faiss {
17
19
 
18
20
  /** Residual quantizer with variable number of bits per sub-quantizer
@@ -29,7 +31,7 @@ struct ResidualQuantizer : AdditiveQuantizer {
29
31
  using train_type_t = int;
30
32
 
31
33
  /// Binary or of the Train_* flags below
32
- train_type_t train_type;
34
+ train_type_t train_type = Train_progressive_dim;
33
35
 
34
36
  /// regular k-means (minimal amount of computation)
35
37
  static const int Train_default = 0;
@@ -41,7 +43,7 @@ struct ResidualQuantizer : AdditiveQuantizer {
41
43
  static const int Train_refine_codebook = 2;
42
44
 
43
45
  /// number of iterations for codebook refinement.
44
- int niter_codebook_refine;
46
+ int niter_codebook_refine = 5;
45
47
 
46
48
  /** set this bit on train_type if beam is to be trained only on the
47
49
  * first element of the beam (faster but less accurate) */
@@ -52,16 +54,20 @@ struct ResidualQuantizer : AdditiveQuantizer {
52
54
  static const int Skip_codebook_tables = 2048;
53
55
 
54
56
  /// beam size used for training and for encoding
55
- int max_beam_size;
57
+ int max_beam_size = 5;
56
58
 
57
59
  /// use LUT for beam search
58
- int use_beam_LUT;
60
+ int use_beam_LUT = 0;
61
+
62
+ /// Currently used mode of approximate min-k computations.
63
+ /// Default value is EXACT_TOPK.
64
+ ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK;
59
65
 
60
66
  /// clustering parameters
61
67
  ProgressiveDimClusteringParameters cp;
62
68
 
63
69
  /// if non-NULL, use this index for assignment
64
- ProgressiveDimIndexFactory* assign_index_factory;
70
+ ProgressiveDimIndexFactory* assign_index_factory = nullptr;
65
71
 
66
72
  ResidualQuantizer(
67
73
  size_t d,
@@ -183,7 +189,8 @@ void beam_search_encode_step(
183
189
  int32_t* new_codes,
184
190
  float* new_residuals,
185
191
  float* new_distances,
186
- Index* assign_index = nullptr);
192
+ Index* assign_index = nullptr,
193
+ ApproxTopK_mode_t approx_topk = ApproxTopK_mode_t::EXACT_TOPK);
187
194
 
188
195
  /** Encode a set of vectors using their dot products with the codebooks
189
196
  *
@@ -202,7 +209,8 @@ void beam_search_encode_step_tab(
202
209
  const int32_t* codes, // n * beam_size * m
203
210
  const float* distances, // n * beam_size
204
211
  size_t new_beam_size,
205
- int32_t* new_codes, // n * new_beam_size * (m + 1)
206
- float* new_distances); // n * new_beam_size
212
+ int32_t* new_codes, // n * new_beam_size * (m + 1)
213
+ float* new_distances, // n * new_beam_size
214
+ ApproxTopK_mode_t approx_topk = ApproxTopK_mode_t::EXACT_TOPK);
207
215
 
208
216
  }; // namespace faiss
@@ -54,7 +54,6 @@ namespace faiss {
54
54
 
55
55
  namespace {
56
56
 
57
- typedef Index::idx_t idx_t;
58
57
  typedef ScalarQuantizer::QuantizerType QuantizerType;
59
58
  typedef ScalarQuantizer::RangeStat RangeStat;
60
59
  using SQDistanceComputer = ScalarQuantizer::SQDistanceComputer;
@@ -1048,12 +1047,11 @@ SQDistanceComputer* select_distance_computer(
1048
1047
  ********************************************************************/
1049
1048
 
1050
1049
  ScalarQuantizer::ScalarQuantizer(size_t d, QuantizerType qtype)
1051
- : Quantizer(d), qtype(qtype), rangestat(RS_minmax), rangestat_arg(0) {
1050
+ : Quantizer(d), qtype(qtype) {
1052
1051
  set_derived_sizes();
1053
1052
  }
1054
1053
 
1055
- ScalarQuantizer::ScalarQuantizer()
1056
- : qtype(QT_8bit), rangestat(RS_minmax), rangestat_arg(0), bits(0) {}
1054
+ ScalarQuantizer::ScalarQuantizer() {}
1057
1055
 
1058
1056
  void ScalarQuantizer::set_derived_sizes() {
1059
1057
  switch (qtype) {
@@ -1131,7 +1129,7 @@ void ScalarQuantizer::train_residual(
1131
1129
  ScopeDeleter<float> del_x(x_in == x ? nullptr : x);
1132
1130
 
1133
1131
  if (by_residual) {
1134
- std::vector<Index::idx_t> idx(n);
1132
+ std::vector<idx_t> idx(n);
1135
1133
  quantizer->assign(n, x, idx.data());
1136
1134
 
1137
1135
  std::vector<float> residuals(n * d);
@@ -34,7 +34,7 @@ struct ScalarQuantizer : Quantizer {
34
34
  QT_6bit, ///< 6 bits per component
35
35
  };
36
36
 
37
- QuantizerType qtype;
37
+ QuantizerType qtype = QT_8bit;
38
38
 
39
39
  /** The uniform encoder can estimate the range of representable
40
40
  * values of the unform encoder using different statistics. Here
@@ -48,11 +48,11 @@ struct ScalarQuantizer : Quantizer {
48
48
  RS_optim, ///< alternate optimization of reconstruction error
49
49
  };
50
50
 
51
- RangeStat rangestat;
52
- float rangestat_arg;
51
+ RangeStat rangestat = RS_minmax;
52
+ float rangestat_arg = 0;
53
53
 
54
54
  /// bits per scalar code
55
- size_t bits;
55
+ size_t bits = 0;
56
56
 
57
57
  /// trained values (including the range)
58
58
  std::vector<float> trained;
@@ -18,7 +18,7 @@ ThreadedIndex<IndexT>::ThreadedIndex(bool threaded)
18
18
 
19
19
  template <typename IndexT>
20
20
  ThreadedIndex<IndexT>::ThreadedIndex(int d, bool threaded)
21
- : IndexT(d), own_fields(false), isThreaded_(threaded) {}
21
+ : IndexT(d), isThreaded_(threaded) {}
22
22
 
23
23
  template <typename IndexT>
24
24
  ThreadedIndex<IndexT>::~ThreadedIndex() {
@@ -35,7 +35,7 @@ ThreadedIndex<IndexT>::~ThreadedIndex() {
35
35
  FAISS_ASSERT(!(bool)p.second);
36
36
  }
37
37
 
38
- if (own_fields) {
38
+ if (own_indices) {
39
39
  delete p.first;
40
40
  }
41
41
  }
@@ -102,7 +102,7 @@ void ThreadedIndex<IndexT>::removeIndex(IndexT* index) {
102
102
  indices_.erase(it);
103
103
  onAfterRemoveIndex(index);
104
104
 
105
- if (own_fields) {
105
+ if (own_indices) {
106
106
  delete index;
107
107
  }
108
108
 
@@ -29,7 +29,7 @@ class ThreadedIndex : public IndexT {
29
29
  /// WARNING: once an index is added, it becomes unsafe to touch it from any
30
30
  /// other thread than that on which is managing it, until we are shut
31
31
  /// down. Use runOnIndex to perform work on it instead.
32
- void addIndex(IndexT* index);
32
+ virtual void addIndex(IndexT* index);
33
33
 
34
34
  /// Remove an index that is managed by ourselves.
35
35
  /// This will flush all pending work on that index, and then shut
@@ -52,17 +52,17 @@ class ThreadedIndex : public IndexT {
52
52
  }
53
53
 
54
54
  /// Returns the i-th sub-index
55
- IndexT* at(int i) {
55
+ IndexT* at(size_t i) {
56
56
  return indices_[i].first;
57
57
  }
58
58
 
59
59
  /// Returns the i-th sub-index (const version)
60
- const IndexT* at(int i) const {
60
+ const IndexT* at(size_t i) const {
61
61
  return indices_[i].first;
62
62
  }
63
63
 
64
64
  /// Whether or not we are responsible for deleting our contained indices
65
- bool own_fields;
65
+ bool own_indices = false;
66
66
 
67
67
  protected:
68
68
  /// Called just after an index is added
@@ -0,0 +1,291 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #ifdef __AVX2__
11
+
12
+ #include <immintrin.h>
13
+
14
+ #include <type_traits>
15
+
16
+ #include <faiss/impl/code_distance/code_distance-generic.h>
17
+
18
+ namespace {
19
+
20
+ // Computes a horizontal sum over an __m256 register
21
+ inline float horizontal_sum(const __m256 reg) {
22
+ const __m256 h0 = _mm256_hadd_ps(reg, reg);
23
+ const __m256 h1 = _mm256_hadd_ps(h0, h0);
24
+
25
+ // extract high and low __m128 regs from __m256
26
+ const __m128 h2 = _mm256_extractf128_ps(h1, 1);
27
+ const __m128 h3 = _mm256_castps256_ps128(h1);
28
+
29
+ // get a final hsum into all 4 regs
30
+ const __m128 h4 = _mm_add_ss(h2, h3);
31
+
32
+ // extract f[0] from __m128
33
+ const float hsum = _mm_cvtss_f32(h4);
34
+ return hsum;
35
+ }
36
+
37
+ } // namespace
38
+
39
+ namespace faiss {
40
+
41
+ template <typename PQDecoderT>
42
+ typename std::enable_if<!std::is_same<PQDecoderT, PQDecoder8>::value, float>::
43
+ type inline distance_single_code_avx2(
44
+ // the product quantizer
45
+ const ProductQuantizer& pq,
46
+ // precomputed distances, layout (M, ksub)
47
+ const float* sim_table,
48
+ const uint8_t* code) {
49
+ // default implementation
50
+ return distance_single_code_generic<PQDecoderT>(pq, sim_table, code);
51
+ }
52
+
53
+ template <typename PQDecoderT>
54
+ typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, float>::
55
+ type inline distance_single_code_avx2(
56
+ // the product quantizer
57
+ const ProductQuantizer& pq,
58
+ // precomputed distances, layout (M, ksub)
59
+ const float* sim_table,
60
+ const uint8_t* code) {
61
+ float result = 0;
62
+
63
+ size_t m = 0;
64
+ const size_t pqM16 = pq.M / 16;
65
+
66
+ const float* tab = sim_table;
67
+
68
+ if (pqM16 > 0) {
69
+ // process 16 values per loop
70
+
71
+ const __m256i ksub = _mm256_set1_epi32(pq.ksub);
72
+ __m256i offsets_0 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
73
+ offsets_0 = _mm256_mullo_epi32(offsets_0, ksub);
74
+
75
+ // accumulators of partial sums
76
+ __m256 partialSum = _mm256_setzero_ps();
77
+
78
+ // loop
79
+ for (m = 0; m < pqM16 * 16; m += 16) {
80
+ // load 16 uint8 values
81
+ const __m128i mm1 = _mm_loadu_si128((const __m128i_u*)(code + m));
82
+ {
83
+ // convert uint8 values (low part of __m128i) to int32
84
+ // values
85
+ const __m256i idx1 = _mm256_cvtepu8_epi32(mm1);
86
+
87
+ // add offsets
88
+ const __m256i indices_to_read_from =
89
+ _mm256_add_epi32(idx1, offsets_0);
90
+
91
+ // gather 8 values, similar to 8 operations of tab[idx]
92
+ __m256 collected = _mm256_i32gather_ps(
93
+ tab, indices_to_read_from, sizeof(float));
94
+ tab += pq.ksub * 8;
95
+
96
+ // collect partial sums
97
+ partialSum = _mm256_add_ps(partialSum, collected);
98
+ }
99
+
100
+ // move high 8 uint8 to low ones
101
+ const __m128i mm2 = _mm_unpackhi_epi64(mm1, _mm_setzero_si128());
102
+ {
103
+ // convert uint8 values (low part of __m128i) to int32
104
+ // values
105
+ const __m256i idx1 = _mm256_cvtepu8_epi32(mm2);
106
+
107
+ // add offsets
108
+ const __m256i indices_to_read_from =
109
+ _mm256_add_epi32(idx1, offsets_0);
110
+
111
+ // gather 8 values, similar to 8 operations of tab[idx]
112
+ __m256 collected = _mm256_i32gather_ps(
113
+ tab, indices_to_read_from, sizeof(float));
114
+ tab += pq.ksub * 8;
115
+
116
+ // collect partial sums
117
+ partialSum = _mm256_add_ps(partialSum, collected);
118
+ }
119
+ }
120
+
121
+ // horizontal sum for partialSum
122
+ result += horizontal_sum(partialSum);
123
+ }
124
+
125
+ //
126
+ if (m < pq.M) {
127
+ // process leftovers
128
+ PQDecoder8 decoder(code + m, pq.nbits);
129
+
130
+ for (; m < pq.M; m++) {
131
+ result += tab[decoder.decode()];
132
+ tab += pq.ksub;
133
+ }
134
+ }
135
+
136
+ return result;
137
+ }
138
+
139
+ template <typename PQDecoderT>
140
+ typename std::enable_if<!std::is_same<PQDecoderT, PQDecoder8>::value, void>::
141
+ type
142
+ distance_four_codes_avx2(
143
+ // the product quantizer
144
+ const ProductQuantizer& pq,
145
+ // precomputed distances, layout (M, ksub)
146
+ const float* sim_table,
147
+ // codes
148
+ const uint8_t* __restrict code0,
149
+ const uint8_t* __restrict code1,
150
+ const uint8_t* __restrict code2,
151
+ const uint8_t* __restrict code3,
152
+ // computed distances
153
+ float& result0,
154
+ float& result1,
155
+ float& result2,
156
+ float& result3) {
157
+ distance_four_codes_generic<PQDecoderT>(
158
+ pq,
159
+ sim_table,
160
+ code0,
161
+ code1,
162
+ code2,
163
+ code3,
164
+ result0,
165
+ result1,
166
+ result2,
167
+ result3);
168
+ }
169
+
170
+ // Combines 4 operations of distance_single_code()
171
+ template <typename PQDecoderT>
172
+ typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, void>::type
173
+ distance_four_codes_avx2(
174
+ // the product quantizer
175
+ const ProductQuantizer& pq,
176
+ // precomputed distances, layout (M, ksub)
177
+ const float* sim_table,
178
+ // codes
179
+ const uint8_t* __restrict code0,
180
+ const uint8_t* __restrict code1,
181
+ const uint8_t* __restrict code2,
182
+ const uint8_t* __restrict code3,
183
+ // computed distances
184
+ float& result0,
185
+ float& result1,
186
+ float& result2,
187
+ float& result3) {
188
+ result0 = 0;
189
+ result1 = 0;
190
+ result2 = 0;
191
+ result3 = 0;
192
+
193
+ size_t m = 0;
194
+ const size_t pqM16 = pq.M / 16;
195
+
196
+ constexpr intptr_t N = 4;
197
+
198
+ const float* tab = sim_table;
199
+
200
+ if (pqM16 > 0) {
201
+ // process 16 values per loop
202
+ const __m256i ksub = _mm256_set1_epi32(pq.ksub);
203
+ __m256i offsets_0 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
204
+ offsets_0 = _mm256_mullo_epi32(offsets_0, ksub);
205
+
206
+ // accumulators of partial sums
207
+ __m256 partialSums[N];
208
+ for (intptr_t j = 0; j < N; j++) {
209
+ partialSums[j] = _mm256_setzero_ps();
210
+ }
211
+
212
+ // loop
213
+ for (m = 0; m < pqM16 * 16; m += 16) {
214
+ // load 16 uint8 values
215
+ __m128i mm1[N];
216
+ mm1[0] = _mm_loadu_si128((const __m128i_u*)(code0 + m));
217
+ mm1[1] = _mm_loadu_si128((const __m128i_u*)(code1 + m));
218
+ mm1[2] = _mm_loadu_si128((const __m128i_u*)(code2 + m));
219
+ mm1[3] = _mm_loadu_si128((const __m128i_u*)(code3 + m));
220
+
221
+ // process first 8 codes
222
+ for (intptr_t j = 0; j < N; j++) {
223
+ // convert uint8 values (low part of __m128i) to int32
224
+ // values
225
+ const __m256i idx1 = _mm256_cvtepu8_epi32(mm1[j]);
226
+
227
+ // add offsets
228
+ const __m256i indices_to_read_from =
229
+ _mm256_add_epi32(idx1, offsets_0);
230
+
231
+ // gather 8 values, similar to 8 operations of tab[idx]
232
+ __m256 collected = _mm256_i32gather_ps(
233
+ tab, indices_to_read_from, sizeof(float));
234
+
235
+ // collect partial sums
236
+ partialSums[j] = _mm256_add_ps(partialSums[j], collected);
237
+ }
238
+ tab += pq.ksub * 8;
239
+
240
+ // process next 8 codes
241
+ for (intptr_t j = 0; j < N; j++) {
242
+ // move high 8 uint8 to low ones
243
+ const __m128i mm2 =
244
+ _mm_unpackhi_epi64(mm1[j], _mm_setzero_si128());
245
+
246
+ // convert uint8 values (low part of __m128i) to int32
247
+ // values
248
+ const __m256i idx1 = _mm256_cvtepu8_epi32(mm2);
249
+
250
+ // add offsets
251
+ const __m256i indices_to_read_from =
252
+ _mm256_add_epi32(idx1, offsets_0);
253
+
254
+ // gather 8 values, similar to 8 operations of tab[idx]
255
+ __m256 collected = _mm256_i32gather_ps(
256
+ tab, indices_to_read_from, sizeof(float));
257
+
258
+ // collect partial sums
259
+ partialSums[j] = _mm256_add_ps(partialSums[j], collected);
260
+ }
261
+
262
+ tab += pq.ksub * 8;
263
+ }
264
+
265
+ // horizontal sum for partialSum
266
+ result0 += horizontal_sum(partialSums[0]);
267
+ result1 += horizontal_sum(partialSums[1]);
268
+ result2 += horizontal_sum(partialSums[2]);
269
+ result3 += horizontal_sum(partialSums[3]);
270
+ }
271
+
272
+ //
273
+ if (m < pq.M) {
274
+ // process leftovers
275
+ PQDecoder8 decoder0(code0 + m, pq.nbits);
276
+ PQDecoder8 decoder1(code1 + m, pq.nbits);
277
+ PQDecoder8 decoder2(code2 + m, pq.nbits);
278
+ PQDecoder8 decoder3(code3 + m, pq.nbits);
279
+ for (; m < pq.M; m++) {
280
+ result0 += tab[decoder0.decode()];
281
+ result1 += tab[decoder1.decode()];
282
+ result2 += tab[decoder2.decode()];
283
+ result3 += tab[decoder3.decode()];
284
+ tab += pq.ksub;
285
+ }
286
+ }
287
+ }
288
+
289
+ } // namespace faiss
290
+
291
+ #endif
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <faiss/impl/ProductQuantizer.h>
11
+
12
+ namespace faiss {
13
+
14
+ /// Returns the distance to a single code.
15
+ template <typename PQDecoderT>
16
+ inline float distance_single_code_generic(
17
+ // the product quantizer
18
+ const ProductQuantizer& pq,
19
+ // precomputed distances, layout (M, ksub)
20
+ const float* sim_table,
21
+ // the code
22
+ const uint8_t* code) {
23
+ PQDecoderT decoder(code, pq.nbits);
24
+
25
+ const float* tab = sim_table;
26
+ float result = 0;
27
+
28
+ for (size_t m = 0; m < pq.M; m++) {
29
+ result += tab[decoder.decode()];
30
+ tab += pq.ksub;
31
+ }
32
+
33
+ return result;
34
+ }
35
+
36
+ /// Combines 4 operations of distance_single_code()
37
+ /// General-purpose version.
38
+ template <typename PQDecoderT>
39
+ inline void distance_four_codes_generic(
40
+ // the product quantizer
41
+ const ProductQuantizer& pq,
42
+ // precomputed distances, layout (M, ksub)
43
+ const float* sim_table,
44
+ // codes
45
+ const uint8_t* __restrict code0,
46
+ const uint8_t* __restrict code1,
47
+ const uint8_t* __restrict code2,
48
+ const uint8_t* __restrict code3,
49
+ // computed distances
50
+ float& result0,
51
+ float& result1,
52
+ float& result2,
53
+ float& result3) {
54
+ PQDecoderT decoder0(code0, pq.nbits);
55
+ PQDecoderT decoder1(code1, pq.nbits);
56
+ PQDecoderT decoder2(code2, pq.nbits);
57
+ PQDecoderT decoder3(code3, pq.nbits);
58
+
59
+ const float* tab = sim_table;
60
+ result0 = 0;
61
+ result1 = 0;
62
+ result2 = 0;
63
+ result3 = 0;
64
+
65
+ for (size_t m = 0; m < pq.M; m++) {
66
+ result0 += tab[decoder0.decode()];
67
+ result1 += tab[decoder1.decode()];
68
+ result2 += tab[decoder2.decode()];
69
+ result3 += tab[decoder3.decode()];
70
+ tab += pq.ksub;
71
+ }
72
+ }
73
+
74
+ } // namespace faiss
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <faiss/impl/platform_macros.h>
11
+
12
+ // This directory contains functions to compute a distance
13
+ // from a given PQ code to a query vector, given that the
14
+ // distances to a query vector for pq.M codebooks are precomputed.
15
+ //
16
+ // The code was originally the part of IndexIVFPQ.cpp.
17
+ // The baseline implementation can be found in
18
+ // code_distance-generic.h, distance_single_code_generic().
19
+
20
+ // The reason for this somewhat unusual structure is that
21
+ // custom implementations may need to fall off to generic
22
+ // implementation in certain cases. So, say, avx2 header file
23
+ // needs to reference the generic header file. This is
24
+ // why the names of the functions for custom implementations
25
+ // have this _generic or _avx2 suffix.
26
+
27
+ #ifdef __AVX2__
28
+
29
+ #include <faiss/impl/code_distance/code_distance-avx2.h>
30
+
31
+ namespace faiss {
32
+
33
+ template <typename PQDecoderT>
34
+ inline float distance_single_code(
35
+ // the product quantizer
36
+ const ProductQuantizer& pq,
37
+ // precomputed distances, layout (M, ksub)
38
+ const float* sim_table,
39
+ // the code
40
+ const uint8_t* code) {
41
+ return distance_single_code_avx2<PQDecoderT>(pq, sim_table, code);
42
+ }
43
+
44
+ template <typename PQDecoderT>
45
+ inline void distance_four_codes(
46
+ // the product quantizer
47
+ const ProductQuantizer& pq,
48
+ // precomputed distances, layout (M, ksub)
49
+ const float* sim_table,
50
+ // codes
51
+ const uint8_t* __restrict code0,
52
+ const uint8_t* __restrict code1,
53
+ const uint8_t* __restrict code2,
54
+ const uint8_t* __restrict code3,
55
+ // computed distances
56
+ float& result0,
57
+ float& result1,
58
+ float& result2,
59
+ float& result3) {
60
+ distance_four_codes_avx2<PQDecoderT>(
61
+ pq,
62
+ sim_table,
63
+ code0,
64
+ code1,
65
+ code2,
66
+ code3,
67
+ result0,
68
+ result1,
69
+ result2,
70
+ result3);
71
+ }
72
+
73
+ } // namespace faiss
74
+
75
+ #else
76
+
77
+ #include <faiss/impl/code_distance/code_distance-generic.h>
78
+
79
+ namespace faiss {
80
+
81
+ template <typename PQDecoderT>
82
+ inline float distance_single_code(
83
+ // the product quantizer
84
+ const ProductQuantizer& pq,
85
+ // precomputed distances, layout (M, ksub)
86
+ const float* sim_table,
87
+ // the code
88
+ const uint8_t* code) {
89
+ return distance_single_code_generic<PQDecoderT>(pq, sim_table, code);
90
+ }
91
+
92
+ template <typename PQDecoderT>
93
+ inline void distance_four_codes(
94
+ // the product quantizer
95
+ const ProductQuantizer& pq,
96
+ // precomputed distances, layout (M, ksub)
97
+ const float* sim_table,
98
+ // codes
99
+ const uint8_t* __restrict code0,
100
+ const uint8_t* __restrict code1,
101
+ const uint8_t* __restrict code2,
102
+ const uint8_t* __restrict code3,
103
+ // computed distances
104
+ float& result0,
105
+ float& result1,
106
+ float& result2,
107
+ float& result3) {
108
+ distance_four_codes_generic<PQDecoderT>(
109
+ pq,
110
+ sim_table,
111
+ code0,
112
+ code1,
113
+ code2,
114
+ code3,
115
+ result0,
116
+ result1,
117
+ result2,
118
+ result3);
119
+ }
120
+
121
+ } // namespace faiss
122
+
123
+ #endif