faiss 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/faiss/extconf.rb +1 -1
  4. data/lib/faiss/version.rb +1 -1
  5. data/lib/faiss.rb +2 -2
  6. data/vendor/faiss/faiss/AutoTune.cpp +15 -4
  7. data/vendor/faiss/faiss/AutoTune.h +0 -1
  8. data/vendor/faiss/faiss/Clustering.cpp +1 -5
  9. data/vendor/faiss/faiss/Clustering.h +0 -2
  10. data/vendor/faiss/faiss/IVFlib.h +0 -2
  11. data/vendor/faiss/faiss/Index.h +1 -2
  12. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
  13. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
  14. data/vendor/faiss/faiss/IndexBinary.h +0 -1
  15. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
  16. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
  17. data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
  18. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
  19. data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
  20. data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
  21. data/vendor/faiss/faiss/IndexFastScan.h +5 -1
  22. data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
  23. data/vendor/faiss/faiss/IndexFlat.h +1 -1
  24. data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
  25. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
  26. data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
  27. data/vendor/faiss/faiss/IndexHNSW.h +0 -1
  28. data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
  29. data/vendor/faiss/faiss/IndexIDMap.h +0 -2
  30. data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
  31. data/vendor/faiss/faiss/IndexIVF.h +121 -61
  32. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
  33. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
  34. data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
  35. data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
  36. data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
  37. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
  38. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
  39. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
  40. data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
  41. data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
  42. data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
  43. data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
  44. data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
  45. data/vendor/faiss/faiss/IndexReplicas.h +0 -1
  46. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
  47. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
  48. data/vendor/faiss/faiss/IndexShards.cpp +26 -109
  49. data/vendor/faiss/faiss/IndexShards.h +2 -3
  50. data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
  51. data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
  52. data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
  53. data/vendor/faiss/faiss/MetaIndexes.h +29 -0
  54. data/vendor/faiss/faiss/MetricType.h +14 -0
  55. data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
  56. data/vendor/faiss/faiss/VectorTransform.h +1 -3
  57. data/vendor/faiss/faiss/clone_index.cpp +232 -18
  58. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
  59. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
  60. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
  61. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
  62. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
  63. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
  64. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
  65. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
  66. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
  67. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
  68. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
  69. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
  70. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
  71. data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
  72. data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
  73. data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
  74. data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
  75. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
  76. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
  77. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
  78. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
  79. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
  80. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
  81. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
  82. data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
  83. data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
  84. data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
  85. data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
  86. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
  87. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
  88. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
  89. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
  90. data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
  91. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
  92. data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
  93. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
  94. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
  95. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
  96. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
  97. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
  98. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
  99. data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
  100. data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
  101. data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
  102. data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
  103. data/vendor/faiss/faiss/impl/HNSW.h +6 -9
  104. data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
  105. data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
  106. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
  107. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
  108. data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
  109. data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
  110. data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
  111. data/vendor/faiss/faiss/impl/NSG.h +4 -7
  112. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
  113. data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
  114. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
  115. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
  116. data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
  117. data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
  118. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
  119. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
  120. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
  121. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
  122. data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
  123. data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
  124. data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
  125. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
  126. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
  127. data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
  128. data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
  129. data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
  130. data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
  131. data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
  132. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
  133. data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
  134. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
  135. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
  136. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
  137. data/vendor/faiss/faiss/index_factory.cpp +8 -10
  138. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
  139. data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
  140. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
  141. data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
  142. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
  143. data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
  144. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
  145. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
  146. data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
  147. data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
  148. data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
  149. data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
  150. data/vendor/faiss/faiss/utils/Heap.h +35 -1
  151. data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
  152. data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
  153. data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
  154. data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
  155. data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
  156. data/vendor/faiss/faiss/utils/distances.cpp +61 -7
  157. data/vendor/faiss/faiss/utils/distances.h +11 -0
  158. data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
  159. data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
  160. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
  161. data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
  162. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
  163. data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
  164. data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
  165. data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
  166. data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
  167. data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
  168. data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
  169. data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
  170. data/vendor/faiss/faiss/utils/fp16.h +7 -0
  171. data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
  172. data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
  173. data/vendor/faiss/faiss/utils/hamming.h +21 -10
  174. data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
  175. data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
  176. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
  177. data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
  178. data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
  179. data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
  180. data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
  181. data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
  182. data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
  183. data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
  184. data/vendor/faiss/faiss/utils/sorting.h +71 -0
  185. data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
  186. data/vendor/faiss/faiss/utils/utils.cpp +4 -176
  187. data/vendor/faiss/faiss/utils/utils.h +2 -9
  188. metadata +29 -3
  189. data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
@@ -13,6 +13,8 @@
13
13
  #include <faiss/Clustering.h>
14
14
  #include <faiss/impl/AdditiveQuantizer.h>
15
15
 
16
+ #include <faiss/utils/approx_topk/mode.h>
17
+
16
18
  namespace faiss {
17
19
 
18
20
  /** Residual quantizer with variable number of bits per sub-quantizer
@@ -29,7 +31,7 @@ struct ResidualQuantizer : AdditiveQuantizer {
29
31
  using train_type_t = int;
30
32
 
31
33
  /// Binary or of the Train_* flags below
32
- train_type_t train_type;
34
+ train_type_t train_type = Train_progressive_dim;
33
35
 
34
36
  /// regular k-means (minimal amount of computation)
35
37
  static const int Train_default = 0;
@@ -41,7 +43,7 @@ struct ResidualQuantizer : AdditiveQuantizer {
41
43
  static const int Train_refine_codebook = 2;
42
44
 
43
45
  /// number of iterations for codebook refinement.
44
- int niter_codebook_refine;
46
+ int niter_codebook_refine = 5;
45
47
 
46
48
  /** set this bit on train_type if beam is to be trained only on the
47
49
  * first element of the beam (faster but less accurate) */
@@ -52,16 +54,20 @@ struct ResidualQuantizer : AdditiveQuantizer {
52
54
  static const int Skip_codebook_tables = 2048;
53
55
 
54
56
  /// beam size used for training and for encoding
55
- int max_beam_size;
57
+ int max_beam_size = 5;
56
58
 
57
59
  /// use LUT for beam search
58
- int use_beam_LUT;
60
+ int use_beam_LUT = 0;
61
+
62
+ /// Currently used mode of approximate min-k computations.
63
+ /// Default value is EXACT_TOPK.
64
+ ApproxTopK_mode_t approx_topk_mode = ApproxTopK_mode_t::EXACT_TOPK;
59
65
 
60
66
  /// clustering parameters
61
67
  ProgressiveDimClusteringParameters cp;
62
68
 
63
69
  /// if non-NULL, use this index for assignment
64
- ProgressiveDimIndexFactory* assign_index_factory;
70
+ ProgressiveDimIndexFactory* assign_index_factory = nullptr;
65
71
 
66
72
  ResidualQuantizer(
67
73
  size_t d,
@@ -183,7 +189,8 @@ void beam_search_encode_step(
183
189
  int32_t* new_codes,
184
190
  float* new_residuals,
185
191
  float* new_distances,
186
- Index* assign_index = nullptr);
192
+ Index* assign_index = nullptr,
193
+ ApproxTopK_mode_t approx_topk = ApproxTopK_mode_t::EXACT_TOPK);
187
194
 
188
195
  /** Encode a set of vectors using their dot products with the codebooks
189
196
  *
@@ -202,7 +209,8 @@ void beam_search_encode_step_tab(
202
209
  const int32_t* codes, // n * beam_size * m
203
210
  const float* distances, // n * beam_size
204
211
  size_t new_beam_size,
205
- int32_t* new_codes, // n * new_beam_size * (m + 1)
206
- float* new_distances); // n * new_beam_size
212
+ int32_t* new_codes, // n * new_beam_size * (m + 1)
213
+ float* new_distances, // n * new_beam_size
214
+ ApproxTopK_mode_t approx_topk = ApproxTopK_mode_t::EXACT_TOPK);
207
215
 
208
216
  }; // namespace faiss
@@ -54,7 +54,6 @@ namespace faiss {
54
54
 
55
55
  namespace {
56
56
 
57
- typedef Index::idx_t idx_t;
58
57
  typedef ScalarQuantizer::QuantizerType QuantizerType;
59
58
  typedef ScalarQuantizer::RangeStat RangeStat;
60
59
  using SQDistanceComputer = ScalarQuantizer::SQDistanceComputer;
@@ -1048,12 +1047,11 @@ SQDistanceComputer* select_distance_computer(
1048
1047
  ********************************************************************/
1049
1048
 
1050
1049
  ScalarQuantizer::ScalarQuantizer(size_t d, QuantizerType qtype)
1051
- : Quantizer(d), qtype(qtype), rangestat(RS_minmax), rangestat_arg(0) {
1050
+ : Quantizer(d), qtype(qtype) {
1052
1051
  set_derived_sizes();
1053
1052
  }
1054
1053
 
1055
- ScalarQuantizer::ScalarQuantizer()
1056
- : qtype(QT_8bit), rangestat(RS_minmax), rangestat_arg(0), bits(0) {}
1054
+ ScalarQuantizer::ScalarQuantizer() {}
1057
1055
 
1058
1056
  void ScalarQuantizer::set_derived_sizes() {
1059
1057
  switch (qtype) {
@@ -1131,7 +1129,7 @@ void ScalarQuantizer::train_residual(
1131
1129
  ScopeDeleter<float> del_x(x_in == x ? nullptr : x);
1132
1130
 
1133
1131
  if (by_residual) {
1134
- std::vector<Index::idx_t> idx(n);
1132
+ std::vector<idx_t> idx(n);
1135
1133
  quantizer->assign(n, x, idx.data());
1136
1134
 
1137
1135
  std::vector<float> residuals(n * d);
@@ -34,7 +34,7 @@ struct ScalarQuantizer : Quantizer {
34
34
  QT_6bit, ///< 6 bits per component
35
35
  };
36
36
 
37
- QuantizerType qtype;
37
+ QuantizerType qtype = QT_8bit;
38
38
 
39
39
  /** The uniform encoder can estimate the range of representable
40
40
  * values of the unform encoder using different statistics. Here
@@ -48,11 +48,11 @@ struct ScalarQuantizer : Quantizer {
48
48
  RS_optim, ///< alternate optimization of reconstruction error
49
49
  };
50
50
 
51
- RangeStat rangestat;
52
- float rangestat_arg;
51
+ RangeStat rangestat = RS_minmax;
52
+ float rangestat_arg = 0;
53
53
 
54
54
  /// bits per scalar code
55
- size_t bits;
55
+ size_t bits = 0;
56
56
 
57
57
  /// trained values (including the range)
58
58
  std::vector<float> trained;
@@ -18,7 +18,7 @@ ThreadedIndex<IndexT>::ThreadedIndex(bool threaded)
18
18
 
19
19
  template <typename IndexT>
20
20
  ThreadedIndex<IndexT>::ThreadedIndex(int d, bool threaded)
21
- : IndexT(d), own_fields(false), isThreaded_(threaded) {}
21
+ : IndexT(d), isThreaded_(threaded) {}
22
22
 
23
23
  template <typename IndexT>
24
24
  ThreadedIndex<IndexT>::~ThreadedIndex() {
@@ -35,7 +35,7 @@ ThreadedIndex<IndexT>::~ThreadedIndex() {
35
35
  FAISS_ASSERT(!(bool)p.second);
36
36
  }
37
37
 
38
- if (own_fields) {
38
+ if (own_indices) {
39
39
  delete p.first;
40
40
  }
41
41
  }
@@ -102,7 +102,7 @@ void ThreadedIndex<IndexT>::removeIndex(IndexT* index) {
102
102
  indices_.erase(it);
103
103
  onAfterRemoveIndex(index);
104
104
 
105
- if (own_fields) {
105
+ if (own_indices) {
106
106
  delete index;
107
107
  }
108
108
 
@@ -29,7 +29,7 @@ class ThreadedIndex : public IndexT {
29
29
  /// WARNING: once an index is added, it becomes unsafe to touch it from any
30
30
  /// other thread than that on which is managing it, until we are shut
31
31
  /// down. Use runOnIndex to perform work on it instead.
32
- void addIndex(IndexT* index);
32
+ virtual void addIndex(IndexT* index);
33
33
 
34
34
  /// Remove an index that is managed by ourselves.
35
35
  /// This will flush all pending work on that index, and then shut
@@ -52,17 +52,17 @@ class ThreadedIndex : public IndexT {
52
52
  }
53
53
 
54
54
  /// Returns the i-th sub-index
55
- IndexT* at(int i) {
55
+ IndexT* at(size_t i) {
56
56
  return indices_[i].first;
57
57
  }
58
58
 
59
59
  /// Returns the i-th sub-index (const version)
60
- const IndexT* at(int i) const {
60
+ const IndexT* at(size_t i) const {
61
61
  return indices_[i].first;
62
62
  }
63
63
 
64
64
  /// Whether or not we are responsible for deleting our contained indices
65
- bool own_fields;
65
+ bool own_indices = false;
66
66
 
67
67
  protected:
68
68
  /// Called just after an index is added
@@ -0,0 +1,291 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #ifdef __AVX2__
11
+
12
+ #include <immintrin.h>
13
+
14
+ #include <type_traits>
15
+
16
+ #include <faiss/impl/code_distance/code_distance-generic.h>
17
+
18
+ namespace {
19
+
20
+ // Computes a horizontal sum over an __m256 register
21
+ inline float horizontal_sum(const __m256 reg) {
22
+ const __m256 h0 = _mm256_hadd_ps(reg, reg);
23
+ const __m256 h1 = _mm256_hadd_ps(h0, h0);
24
+
25
+ // extract high and low __m128 regs from __m256
26
+ const __m128 h2 = _mm256_extractf128_ps(h1, 1);
27
+ const __m128 h3 = _mm256_castps256_ps128(h1);
28
+
29
+ // get a final hsum into all 4 regs
30
+ const __m128 h4 = _mm_add_ss(h2, h3);
31
+
32
+ // extract f[0] from __m128
33
+ const float hsum = _mm_cvtss_f32(h4);
34
+ return hsum;
35
+ }
36
+
37
+ } // namespace
38
+
39
+ namespace faiss {
40
+
41
+ template <typename PQDecoderT>
42
+ typename std::enable_if<!std::is_same<PQDecoderT, PQDecoder8>::value, float>::
43
+ type inline distance_single_code_avx2(
44
+ // the product quantizer
45
+ const ProductQuantizer& pq,
46
+ // precomputed distances, layout (M, ksub)
47
+ const float* sim_table,
48
+ const uint8_t* code) {
49
+ // default implementation
50
+ return distance_single_code_generic<PQDecoderT>(pq, sim_table, code);
51
+ }
52
+
53
+ template <typename PQDecoderT>
54
+ typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, float>::
55
+ type inline distance_single_code_avx2(
56
+ // the product quantizer
57
+ const ProductQuantizer& pq,
58
+ // precomputed distances, layout (M, ksub)
59
+ const float* sim_table,
60
+ const uint8_t* code) {
61
+ float result = 0;
62
+
63
+ size_t m = 0;
64
+ const size_t pqM16 = pq.M / 16;
65
+
66
+ const float* tab = sim_table;
67
+
68
+ if (pqM16 > 0) {
69
+ // process 16 values per loop
70
+
71
+ const __m256i ksub = _mm256_set1_epi32(pq.ksub);
72
+ __m256i offsets_0 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
73
+ offsets_0 = _mm256_mullo_epi32(offsets_0, ksub);
74
+
75
+ // accumulators of partial sums
76
+ __m256 partialSum = _mm256_setzero_ps();
77
+
78
+ // loop
79
+ for (m = 0; m < pqM16 * 16; m += 16) {
80
+ // load 16 uint8 values
81
+ const __m128i mm1 = _mm_loadu_si128((const __m128i_u*)(code + m));
82
+ {
83
+ // convert uint8 values (low part of __m128i) to int32
84
+ // values
85
+ const __m256i idx1 = _mm256_cvtepu8_epi32(mm1);
86
+
87
+ // add offsets
88
+ const __m256i indices_to_read_from =
89
+ _mm256_add_epi32(idx1, offsets_0);
90
+
91
+ // gather 8 values, similar to 8 operations of tab[idx]
92
+ __m256 collected = _mm256_i32gather_ps(
93
+ tab, indices_to_read_from, sizeof(float));
94
+ tab += pq.ksub * 8;
95
+
96
+ // collect partial sums
97
+ partialSum = _mm256_add_ps(partialSum, collected);
98
+ }
99
+
100
+ // move high 8 uint8 to low ones
101
+ const __m128i mm2 = _mm_unpackhi_epi64(mm1, _mm_setzero_si128());
102
+ {
103
+ // convert uint8 values (low part of __m128i) to int32
104
+ // values
105
+ const __m256i idx1 = _mm256_cvtepu8_epi32(mm2);
106
+
107
+ // add offsets
108
+ const __m256i indices_to_read_from =
109
+ _mm256_add_epi32(idx1, offsets_0);
110
+
111
+ // gather 8 values, similar to 8 operations of tab[idx]
112
+ __m256 collected = _mm256_i32gather_ps(
113
+ tab, indices_to_read_from, sizeof(float));
114
+ tab += pq.ksub * 8;
115
+
116
+ // collect partial sums
117
+ partialSum = _mm256_add_ps(partialSum, collected);
118
+ }
119
+ }
120
+
121
+ // horizontal sum for partialSum
122
+ result += horizontal_sum(partialSum);
123
+ }
124
+
125
+ //
126
+ if (m < pq.M) {
127
+ // process leftovers
128
+ PQDecoder8 decoder(code + m, pq.nbits);
129
+
130
+ for (; m < pq.M; m++) {
131
+ result += tab[decoder.decode()];
132
+ tab += pq.ksub;
133
+ }
134
+ }
135
+
136
+ return result;
137
+ }
138
+
139
+ template <typename PQDecoderT>
140
+ typename std::enable_if<!std::is_same<PQDecoderT, PQDecoder8>::value, void>::
141
+ type
142
+ distance_four_codes_avx2(
143
+ // the product quantizer
144
+ const ProductQuantizer& pq,
145
+ // precomputed distances, layout (M, ksub)
146
+ const float* sim_table,
147
+ // codes
148
+ const uint8_t* __restrict code0,
149
+ const uint8_t* __restrict code1,
150
+ const uint8_t* __restrict code2,
151
+ const uint8_t* __restrict code3,
152
+ // computed distances
153
+ float& result0,
154
+ float& result1,
155
+ float& result2,
156
+ float& result3) {
157
+ distance_four_codes_generic<PQDecoderT>(
158
+ pq,
159
+ sim_table,
160
+ code0,
161
+ code1,
162
+ code2,
163
+ code3,
164
+ result0,
165
+ result1,
166
+ result2,
167
+ result3);
168
+ }
169
+
170
+ // Combines 4 operations of distance_single_code()
171
+ template <typename PQDecoderT>
172
+ typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, void>::type
173
+ distance_four_codes_avx2(
174
+ // the product quantizer
175
+ const ProductQuantizer& pq,
176
+ // precomputed distances, layout (M, ksub)
177
+ const float* sim_table,
178
+ // codes
179
+ const uint8_t* __restrict code0,
180
+ const uint8_t* __restrict code1,
181
+ const uint8_t* __restrict code2,
182
+ const uint8_t* __restrict code3,
183
+ // computed distances
184
+ float& result0,
185
+ float& result1,
186
+ float& result2,
187
+ float& result3) {
188
+ result0 = 0;
189
+ result1 = 0;
190
+ result2 = 0;
191
+ result3 = 0;
192
+
193
+ size_t m = 0;
194
+ const size_t pqM16 = pq.M / 16;
195
+
196
+ constexpr intptr_t N = 4;
197
+
198
+ const float* tab = sim_table;
199
+
200
+ if (pqM16 > 0) {
201
+ // process 16 values per loop
202
+ const __m256i ksub = _mm256_set1_epi32(pq.ksub);
203
+ __m256i offsets_0 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
204
+ offsets_0 = _mm256_mullo_epi32(offsets_0, ksub);
205
+
206
+ // accumulators of partial sums
207
+ __m256 partialSums[N];
208
+ for (intptr_t j = 0; j < N; j++) {
209
+ partialSums[j] = _mm256_setzero_ps();
210
+ }
211
+
212
+ // loop
213
+ for (m = 0; m < pqM16 * 16; m += 16) {
214
+ // load 16 uint8 values
215
+ __m128i mm1[N];
216
+ mm1[0] = _mm_loadu_si128((const __m128i_u*)(code0 + m));
217
+ mm1[1] = _mm_loadu_si128((const __m128i_u*)(code1 + m));
218
+ mm1[2] = _mm_loadu_si128((const __m128i_u*)(code2 + m));
219
+ mm1[3] = _mm_loadu_si128((const __m128i_u*)(code3 + m));
220
+
221
+ // process first 8 codes
222
+ for (intptr_t j = 0; j < N; j++) {
223
+ // convert uint8 values (low part of __m128i) to int32
224
+ // values
225
+ const __m256i idx1 = _mm256_cvtepu8_epi32(mm1[j]);
226
+
227
+ // add offsets
228
+ const __m256i indices_to_read_from =
229
+ _mm256_add_epi32(idx1, offsets_0);
230
+
231
+ // gather 8 values, similar to 8 operations of tab[idx]
232
+ __m256 collected = _mm256_i32gather_ps(
233
+ tab, indices_to_read_from, sizeof(float));
234
+
235
+ // collect partial sums
236
+ partialSums[j] = _mm256_add_ps(partialSums[j], collected);
237
+ }
238
+ tab += pq.ksub * 8;
239
+
240
+ // process next 8 codes
241
+ for (intptr_t j = 0; j < N; j++) {
242
+ // move high 8 uint8 to low ones
243
+ const __m128i mm2 =
244
+ _mm_unpackhi_epi64(mm1[j], _mm_setzero_si128());
245
+
246
+ // convert uint8 values (low part of __m128i) to int32
247
+ // values
248
+ const __m256i idx1 = _mm256_cvtepu8_epi32(mm2);
249
+
250
+ // add offsets
251
+ const __m256i indices_to_read_from =
252
+ _mm256_add_epi32(idx1, offsets_0);
253
+
254
+ // gather 8 values, similar to 8 operations of tab[idx]
255
+ __m256 collected = _mm256_i32gather_ps(
256
+ tab, indices_to_read_from, sizeof(float));
257
+
258
+ // collect partial sums
259
+ partialSums[j] = _mm256_add_ps(partialSums[j], collected);
260
+ }
261
+
262
+ tab += pq.ksub * 8;
263
+ }
264
+
265
+ // horizontal sum for partialSum
266
+ result0 += horizontal_sum(partialSums[0]);
267
+ result1 += horizontal_sum(partialSums[1]);
268
+ result2 += horizontal_sum(partialSums[2]);
269
+ result3 += horizontal_sum(partialSums[3]);
270
+ }
271
+
272
+ //
273
+ if (m < pq.M) {
274
+ // process leftovers
275
+ PQDecoder8 decoder0(code0 + m, pq.nbits);
276
+ PQDecoder8 decoder1(code1 + m, pq.nbits);
277
+ PQDecoder8 decoder2(code2 + m, pq.nbits);
278
+ PQDecoder8 decoder3(code3 + m, pq.nbits);
279
+ for (; m < pq.M; m++) {
280
+ result0 += tab[decoder0.decode()];
281
+ result1 += tab[decoder1.decode()];
282
+ result2 += tab[decoder2.decode()];
283
+ result3 += tab[decoder3.decode()];
284
+ tab += pq.ksub;
285
+ }
286
+ }
287
+ }
288
+
289
+ } // namespace faiss
290
+
291
+ #endif
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <faiss/impl/ProductQuantizer.h>
11
+
12
+ namespace faiss {
13
+
14
+ /// Returns the distance to a single code.
15
+ template <typename PQDecoderT>
16
+ inline float distance_single_code_generic(
17
+ // the product quantizer
18
+ const ProductQuantizer& pq,
19
+ // precomputed distances, layout (M, ksub)
20
+ const float* sim_table,
21
+ // the code
22
+ const uint8_t* code) {
23
+ PQDecoderT decoder(code, pq.nbits);
24
+
25
+ const float* tab = sim_table;
26
+ float result = 0;
27
+
28
+ for (size_t m = 0; m < pq.M; m++) {
29
+ result += tab[decoder.decode()];
30
+ tab += pq.ksub;
31
+ }
32
+
33
+ return result;
34
+ }
35
+
36
+ /// Combines 4 operations of distance_single_code()
37
+ /// General-purpose version.
38
+ template <typename PQDecoderT>
39
+ inline void distance_four_codes_generic(
40
+ // the product quantizer
41
+ const ProductQuantizer& pq,
42
+ // precomputed distances, layout (M, ksub)
43
+ const float* sim_table,
44
+ // codes
45
+ const uint8_t* __restrict code0,
46
+ const uint8_t* __restrict code1,
47
+ const uint8_t* __restrict code2,
48
+ const uint8_t* __restrict code3,
49
+ // computed distances
50
+ float& result0,
51
+ float& result1,
52
+ float& result2,
53
+ float& result3) {
54
+ PQDecoderT decoder0(code0, pq.nbits);
55
+ PQDecoderT decoder1(code1, pq.nbits);
56
+ PQDecoderT decoder2(code2, pq.nbits);
57
+ PQDecoderT decoder3(code3, pq.nbits);
58
+
59
+ const float* tab = sim_table;
60
+ result0 = 0;
61
+ result1 = 0;
62
+ result2 = 0;
63
+ result3 = 0;
64
+
65
+ for (size_t m = 0; m < pq.M; m++) {
66
+ result0 += tab[decoder0.decode()];
67
+ result1 += tab[decoder1.decode()];
68
+ result2 += tab[decoder2.decode()];
69
+ result3 += tab[decoder3.decode()];
70
+ tab += pq.ksub;
71
+ }
72
+ }
73
+
74
+ } // namespace faiss
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Copyright (c) Facebook, Inc. and its affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <faiss/impl/platform_macros.h>
11
+
12
+ // This directory contains functions to compute a distance
13
+ // from a given PQ code to a query vector, given that the
14
+ // distances to a query vector for pq.M codebooks are precomputed.
15
+ //
16
+ // The code was originally the part of IndexIVFPQ.cpp.
17
+ // The baseline implementation can be found in
18
+ // code_distance-generic.h, distance_single_code_generic().
19
+
20
+ // The reason for this somewhat unusual structure is that
21
+ // custom implementations may need to fall off to generic
22
+ // implementation in certain cases. So, say, avx2 header file
23
+ // needs to reference the generic header file. This is
24
+ // why the names of the functions for custom implementations
25
+ // have this _generic or _avx2 suffix.
26
+
27
+ #ifdef __AVX2__
28
+
29
+ #include <faiss/impl/code_distance/code_distance-avx2.h>
30
+
31
+ namespace faiss {
32
+
33
+ template <typename PQDecoderT>
34
+ inline float distance_single_code(
35
+ // the product quantizer
36
+ const ProductQuantizer& pq,
37
+ // precomputed distances, layout (M, ksub)
38
+ const float* sim_table,
39
+ // the code
40
+ const uint8_t* code) {
41
+ return distance_single_code_avx2<PQDecoderT>(pq, sim_table, code);
42
+ }
43
+
44
+ template <typename PQDecoderT>
45
+ inline void distance_four_codes(
46
+ // the product quantizer
47
+ const ProductQuantizer& pq,
48
+ // precomputed distances, layout (M, ksub)
49
+ const float* sim_table,
50
+ // codes
51
+ const uint8_t* __restrict code0,
52
+ const uint8_t* __restrict code1,
53
+ const uint8_t* __restrict code2,
54
+ const uint8_t* __restrict code3,
55
+ // computed distances
56
+ float& result0,
57
+ float& result1,
58
+ float& result2,
59
+ float& result3) {
60
+ distance_four_codes_avx2<PQDecoderT>(
61
+ pq,
62
+ sim_table,
63
+ code0,
64
+ code1,
65
+ code2,
66
+ code3,
67
+ result0,
68
+ result1,
69
+ result2,
70
+ result3);
71
+ }
72
+
73
+ } // namespace faiss
74
+
75
+ #else
76
+
77
+ #include <faiss/impl/code_distance/code_distance-generic.h>
78
+
79
+ namespace faiss {
80
+
81
+ template <typename PQDecoderT>
82
+ inline float distance_single_code(
83
+ // the product quantizer
84
+ const ProductQuantizer& pq,
85
+ // precomputed distances, layout (M, ksub)
86
+ const float* sim_table,
87
+ // the code
88
+ const uint8_t* code) {
89
+ return distance_single_code_generic<PQDecoderT>(pq, sim_table, code);
90
+ }
91
+
92
+ template <typename PQDecoderT>
93
+ inline void distance_four_codes(
94
+ // the product quantizer
95
+ const ProductQuantizer& pq,
96
+ // precomputed distances, layout (M, ksub)
97
+ const float* sim_table,
98
+ // codes
99
+ const uint8_t* __restrict code0,
100
+ const uint8_t* __restrict code1,
101
+ const uint8_t* __restrict code2,
102
+ const uint8_t* __restrict code3,
103
+ // computed distances
104
+ float& result0,
105
+ float& result1,
106
+ float& result2,
107
+ float& result3) {
108
+ distance_four_codes_generic<PQDecoderT>(
109
+ pq,
110
+ sim_table,
111
+ code0,
112
+ code1,
113
+ code2,
114
+ code3,
115
+ result0,
116
+ result1,
117
+ result2,
118
+ result3);
119
+ }
120
+
121
+ } // namespace faiss
122
+
123
+ #endif