faiss 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE.txt +1 -1
  4. data/ext/faiss/ext.cpp +1 -1
  5. data/ext/faiss/extconf.rb +5 -6
  6. data/ext/faiss/index_binary.cpp +76 -17
  7. data/ext/faiss/{index.cpp → index_rb.cpp} +108 -35
  8. data/ext/faiss/kmeans.cpp +12 -9
  9. data/ext/faiss/numo.hpp +11 -9
  10. data/ext/faiss/pca_matrix.cpp +10 -8
  11. data/ext/faiss/product_quantizer.cpp +14 -12
  12. data/ext/faiss/{utils.cpp → utils_rb.cpp} +10 -3
  13. data/ext/faiss/{utils.h → utils_rb.h} +6 -0
  14. data/lib/faiss/version.rb +1 -1
  15. data/lib/faiss.rb +1 -1
  16. data/vendor/faiss/faiss/AutoTune.cpp +130 -11
  17. data/vendor/faiss/faiss/AutoTune.h +14 -1
  18. data/vendor/faiss/faiss/Clustering.cpp +59 -10
  19. data/vendor/faiss/faiss/Clustering.h +12 -0
  20. data/vendor/faiss/faiss/IVFlib.cpp +31 -28
  21. data/vendor/faiss/faiss/Index.cpp +20 -8
  22. data/vendor/faiss/faiss/Index.h +25 -3
  23. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +19 -24
  24. data/vendor/faiss/faiss/IndexBinary.cpp +1 -0
  25. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +9 -4
  26. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +45 -11
  27. data/vendor/faiss/faiss/IndexFastScan.cpp +35 -22
  28. data/vendor/faiss/faiss/IndexFastScan.h +10 -1
  29. data/vendor/faiss/faiss/IndexFlat.cpp +193 -136
  30. data/vendor/faiss/faiss/IndexFlat.h +16 -1
  31. data/vendor/faiss/faiss/IndexFlatCodes.cpp +46 -22
  32. data/vendor/faiss/faiss/IndexFlatCodes.h +7 -1
  33. data/vendor/faiss/faiss/IndexHNSW.cpp +24 -50
  34. data/vendor/faiss/faiss/IndexHNSW.h +14 -12
  35. data/vendor/faiss/faiss/IndexIDMap.cpp +1 -1
  36. data/vendor/faiss/faiss/IndexIVF.cpp +76 -49
  37. data/vendor/faiss/faiss/IndexIVF.h +14 -4
  38. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +11 -8
  39. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +2 -2
  40. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +25 -14
  41. data/vendor/faiss/faiss/IndexIVFFastScan.h +26 -22
  42. data/vendor/faiss/faiss/IndexIVFFlat.cpp +10 -61
  43. data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +39 -111
  44. data/vendor/faiss/faiss/IndexIVFPQ.cpp +89 -147
  45. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +37 -5
  46. data/vendor/faiss/faiss/IndexIVFPQR.cpp +2 -1
  47. data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +42 -30
  48. data/vendor/faiss/faiss/IndexIVFRaBitQ.h +2 -2
  49. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +246 -97
  50. data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +32 -29
  51. data/vendor/faiss/faiss/IndexLSH.cpp +8 -6
  52. data/vendor/faiss/faiss/IndexLattice.cpp +29 -24
  53. data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -0
  54. data/vendor/faiss/faiss/IndexNSG.cpp +2 -1
  55. data/vendor/faiss/faiss/IndexNSG.h +0 -2
  56. data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +1 -1
  57. data/vendor/faiss/faiss/IndexPQ.cpp +19 -10
  58. data/vendor/faiss/faiss/IndexRaBitQ.cpp +26 -13
  59. data/vendor/faiss/faiss/IndexRaBitQ.h +2 -2
  60. data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +132 -78
  61. data/vendor/faiss/faiss/IndexRaBitQFastScan.h +14 -12
  62. data/vendor/faiss/faiss/IndexRefine.cpp +0 -30
  63. data/vendor/faiss/faiss/IndexShards.cpp +3 -4
  64. data/vendor/faiss/faiss/MetricType.h +16 -0
  65. data/vendor/faiss/faiss/VectorTransform.cpp +120 -0
  66. data/vendor/faiss/faiss/VectorTransform.h +23 -0
  67. data/vendor/faiss/faiss/clone_index.cpp +7 -4
  68. data/vendor/faiss/faiss/{cppcontrib/factory_tools.cpp → factory_tools.cpp} +1 -1
  69. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
  70. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +37 -11
  71. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -28
  72. data/vendor/faiss/faiss/impl/ClusteringInitialization.cpp +367 -0
  73. data/vendor/faiss/faiss/impl/ClusteringInitialization.h +107 -0
  74. data/vendor/faiss/faiss/impl/CodePacker.cpp +4 -0
  75. data/vendor/faiss/faiss/impl/CodePacker.h +11 -3
  76. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.cpp +83 -0
  77. data/vendor/faiss/faiss/impl/CodePackerRaBitQ.h +47 -0
  78. data/vendor/faiss/faiss/impl/FaissAssert.h +60 -2
  79. data/vendor/faiss/faiss/impl/HNSW.cpp +25 -34
  80. data/vendor/faiss/faiss/impl/HNSW.h +8 -6
  81. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +34 -27
  82. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
  83. data/vendor/faiss/faiss/impl/NSG.cpp +6 -5
  84. data/vendor/faiss/faiss/impl/NSG.h +17 -7
  85. data/vendor/faiss/faiss/impl/Panorama.cpp +53 -46
  86. data/vendor/faiss/faiss/impl/Panorama.h +22 -6
  87. data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +16 -5
  88. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +70 -58
  89. data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +92 -0
  90. data/vendor/faiss/faiss/impl/RaBitQUtils.h +93 -31
  91. data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +12 -28
  92. data/vendor/faiss/faiss/impl/RaBitQuantizer.h +3 -10
  93. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.cpp +15 -41
  94. data/vendor/faiss/faiss/impl/RaBitQuantizerMultiBit.h +0 -4
  95. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +14 -9
  96. data/vendor/faiss/faiss/impl/ResultHandler.h +131 -50
  97. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +67 -2358
  98. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +0 -2
  99. data/vendor/faiss/faiss/impl/VisitedTable.cpp +42 -0
  100. data/vendor/faiss/faiss/impl/VisitedTable.h +69 -0
  101. data/vendor/faiss/faiss/impl/expanded_scanners.h +158 -0
  102. data/vendor/faiss/faiss/impl/index_read.cpp +829 -471
  103. data/vendor/faiss/faiss/impl/index_read_utils.h +0 -1
  104. data/vendor/faiss/faiss/impl/index_write.cpp +17 -8
  105. data/vendor/faiss/faiss/impl/lattice_Zn.cpp +47 -20
  106. data/vendor/faiss/faiss/impl/mapped_io.cpp +9 -2
  107. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +7 -2
  108. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +11 -3
  109. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +19 -13
  110. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +29 -21
  111. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx2.h → pq_code_distance/pq_code_distance-avx2.cpp} +42 -215
  112. data/vendor/faiss/faiss/impl/{code_distance/code_distance-avx512.h → pq_code_distance/pq_code_distance-avx512.cpp} +68 -107
  113. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-generic.cpp +141 -0
  114. data/vendor/faiss/faiss/impl/pq_code_distance/pq_code_distance-inl.h +23 -0
  115. data/vendor/faiss/faiss/impl/{code_distance/code_distance-sve.h → pq_code_distance/pq_code_distance-sve.cpp} +57 -144
  116. data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +9 -6
  117. data/vendor/faiss/faiss/impl/scalar_quantizer/codecs.h +121 -0
  118. data/vendor/faiss/faiss/impl/scalar_quantizer/distance_computers.h +136 -0
  119. data/vendor/faiss/faiss/impl/scalar_quantizer/quantizers.h +280 -0
  120. data/vendor/faiss/faiss/impl/scalar_quantizer/scanners.h +164 -0
  121. data/vendor/faiss/faiss/impl/scalar_quantizer/similarities.h +94 -0
  122. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx2.cpp +455 -0
  123. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-avx512.cpp +430 -0
  124. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-dispatch.h +329 -0
  125. data/vendor/faiss/faiss/impl/scalar_quantizer/sq-neon.cpp +467 -0
  126. data/vendor/faiss/faiss/impl/scalar_quantizer/training.cpp +203 -0
  127. data/vendor/faiss/faiss/impl/scalar_quantizer/training.h +42 -0
  128. data/vendor/faiss/faiss/impl/simd_dispatch.h +139 -0
  129. data/vendor/faiss/faiss/impl/simd_result_handlers.h +18 -18
  130. data/vendor/faiss/faiss/index_factory.cpp +35 -16
  131. data/vendor/faiss/faiss/index_io.h +29 -3
  132. data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +7 -4
  133. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +1 -1
  134. data/vendor/faiss/faiss/svs/IndexSVSFaissUtils.h +9 -19
  135. data/vendor/faiss/faiss/svs/IndexSVSFlat.h +2 -0
  136. data/vendor/faiss/faiss/svs/IndexSVSVamana.h +2 -1
  137. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.cpp +9 -1
  138. data/vendor/faiss/faiss/svs/IndexSVSVamanaLeanVec.h +9 -0
  139. data/vendor/faiss/faiss/utils/Heap.cpp +46 -0
  140. data/vendor/faiss/faiss/utils/Heap.h +21 -0
  141. data/vendor/faiss/faiss/utils/NeuralNet.cpp +10 -7
  142. data/vendor/faiss/faiss/utils/distances.cpp +141 -23
  143. data/vendor/faiss/faiss/utils/distances.h +98 -0
  144. data/vendor/faiss/faiss/utils/distances_dispatch.h +170 -0
  145. data/vendor/faiss/faiss/utils/distances_simd.cpp +74 -3511
  146. data/vendor/faiss/faiss/utils/extra_distances-inl.h +164 -157
  147. data/vendor/faiss/faiss/utils/extra_distances.cpp +52 -95
  148. data/vendor/faiss/faiss/utils/extra_distances.h +47 -1
  149. data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +0 -1
  150. data/vendor/faiss/faiss/utils/partitioning.cpp +1 -1
  151. data/vendor/faiss/faiss/utils/pq_code_distance.h +251 -0
  152. data/vendor/faiss/faiss/utils/rabitq_simd.h +260 -0
  153. data/vendor/faiss/faiss/utils/simd_impl/distances_aarch64.cpp +150 -0
  154. data/vendor/faiss/faiss/utils/simd_impl/distances_arm_sve.cpp +568 -0
  155. data/vendor/faiss/faiss/utils/simd_impl/distances_autovec-inl.h +153 -0
  156. data/vendor/faiss/faiss/utils/simd_impl/distances_avx2.cpp +1185 -0
  157. data/vendor/faiss/faiss/utils/simd_impl/distances_avx512.cpp +1092 -0
  158. data/vendor/faiss/faiss/utils/simd_impl/distances_sse-inl.h +391 -0
  159. data/vendor/faiss/faiss/utils/simd_levels.cpp +322 -0
  160. data/vendor/faiss/faiss/utils/simd_levels.h +91 -0
  161. data/vendor/faiss/faiss/utils/simdlib_avx2.h +12 -1
  162. data/vendor/faiss/faiss/utils/simdlib_avx512.h +69 -0
  163. data/vendor/faiss/faiss/utils/simdlib_neon.h +6 -0
  164. data/vendor/faiss/faiss/utils/sorting.cpp +4 -4
  165. data/vendor/faiss/faiss/utils/utils.cpp +16 -9
  166. metadata +47 -18
  167. data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +0 -81
  168. data/vendor/faiss/faiss/impl/code_distance/code_distance.h +0 -186
  169. /data/vendor/faiss/faiss/{cppcontrib/factory_tools.h → factory_tools.h} +0 -0
@@ -5,64 +5,34 @@
5
5
  * LICENSE file in the root directory of this source tree.
6
6
  */
7
7
 
8
- #pragma once
9
-
10
- #ifdef __ARM_FEATURE_SVE
8
+ #ifdef COMPILE_SIMD_ARM_SVE
11
9
 
12
10
  #include <arm_sve.h>
13
11
 
14
- #include <tuple>
15
- #include <type_traits>
16
-
17
- #include <faiss/impl/ProductQuantizer.h>
18
- #include <faiss/impl/code_distance/code_distance-generic.h>
19
-
20
- namespace faiss {
12
+ #include <faiss/impl/pq_code_distance/pq_code_distance-inl.h>
21
13
 
22
- template <typename PQDecoderT>
23
- std::enable_if_t<!std::is_same_v<PQDecoderT, PQDecoder8>, float> inline distance_single_code_sve(
24
- // the product quantizer
25
- const size_t M,
26
- // number of bits per quantization index
27
- const size_t nbits,
28
- // precomputed distances, layout (M, ksub)
29
- const float* sim_table,
30
- const uint8_t* code) {
31
- // default implementation
32
- return distance_single_code_generic<PQDecoderT>(M, nbits, sim_table, code);
33
- }
14
+ namespace {
34
15
 
35
- static inline void distance_codes_kernel(
16
+ inline void distance_codes_kernel(
36
17
  svbool_t pg,
37
18
  svuint32_t idx1,
38
19
  svuint32_t offsets_0,
39
20
  const float* tab,
40
21
  svfloat32_t& partialSum) {
41
- // add offset
42
22
  const auto indices_to_read_from = svadd_u32_x(pg, idx1, offsets_0);
43
-
44
- // gather values, similar to some operations of tab[index]
45
23
  const auto collected =
46
24
  svld1_gather_u32index_f32(pg, tab, indices_to_read_from);
47
-
48
- // collect partial sum
49
25
  partialSum = svadd_f32_m(pg, partialSum, collected);
50
26
  }
51
27
 
52
- static inline float distance_single_code_sve_for_small_m(
53
- // the product quantizer
28
+ inline float distance_single_code_sve_for_small_m(
54
29
  const size_t M,
55
- // precomputed distances, layout (M, ksub)
56
30
  const float* sim_table,
57
- // codes
58
31
  const uint8_t* __restrict code) {
59
32
  constexpr size_t nbits = 8u;
60
-
61
33
  const size_t ksub = 1 << nbits;
62
34
 
63
35
  const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
64
-
65
- // loop
66
36
  const auto pg = svwhilelt_b32_u64(0, M);
67
37
 
68
38
  auto mm1 = svld1ub_u32(pg, code);
@@ -71,44 +41,75 @@ static inline float distance_single_code_sve_for_small_m(
71
41
  return svaddv_f32(pg, collected0);
72
42
  }
73
43
 
74
- template <typename PQDecoderT>
75
- std::enable_if_t<std::is_same_v<PQDecoderT, PQDecoder8>, float> inline distance_single_code_sve(
76
- // the product quantizer
44
+ inline void distance_four_codes_sve_for_small_m(
77
45
  const size_t M,
78
- // number of bits per quantization index
79
- const size_t nbits,
80
- // precomputed distances, layout (M, ksub)
46
+ const float* sim_table,
47
+ const uint8_t* __restrict code0,
48
+ const uint8_t* __restrict code1,
49
+ const uint8_t* __restrict code2,
50
+ const uint8_t* __restrict code3,
51
+ float& result0,
52
+ float& result1,
53
+ float& result2,
54
+ float& result3) {
55
+ constexpr size_t nbits = 8u;
56
+ const size_t ksub = 1 << nbits;
57
+
58
+ const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
59
+ const auto pg = svwhilelt_b32_u64(0, M);
60
+
61
+ auto mm10 = svld1ub_u32(pg, code0);
62
+ auto mm11 = svld1ub_u32(pg, code1);
63
+ auto mm12 = svld1ub_u32(pg, code2);
64
+ auto mm13 = svld1ub_u32(pg, code3);
65
+ mm10 = svadd_u32_x(pg, mm10, offsets_0);
66
+ mm11 = svadd_u32_x(pg, mm11, offsets_0);
67
+ mm12 = svadd_u32_x(pg, mm12, offsets_0);
68
+ mm13 = svadd_u32_x(pg, mm13, offsets_0);
69
+ const auto collected0 = svld1_gather_u32index_f32(pg, sim_table, mm10);
70
+ const auto collected1 = svld1_gather_u32index_f32(pg, sim_table, mm11);
71
+ const auto collected2 = svld1_gather_u32index_f32(pg, sim_table, mm12);
72
+ const auto collected3 = svld1_gather_u32index_f32(pg, sim_table, mm13);
73
+ result0 = svaddv_f32(pg, collected0);
74
+ result1 = svaddv_f32(pg, collected1);
75
+ result2 = svaddv_f32(pg, collected2);
76
+ result3 = svaddv_f32(pg, collected3);
77
+ }
78
+
79
+ } // namespace
80
+
81
+ namespace faiss {
82
+ namespace pq_code_distance {
83
+
84
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
85
+ template <>
86
+ float pq_code_distance_single_impl<SIMDLevel::ARM_SVE>(
87
+ size_t M,
88
+ size_t nbits,
81
89
  const float* sim_table,
82
90
  const uint8_t* code) {
83
91
  if (M <= svcntw())
84
92
  return distance_single_code_sve_for_small_m(M, sim_table, code);
85
93
 
86
94
  const float* tab = sim_table;
87
-
88
95
  const size_t ksub = 1 << nbits;
89
96
 
90
97
  const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
91
-
92
- // accumulators of partial sums
93
98
  auto partialSum = svdup_n_f32(0.f);
94
99
 
95
100
  const auto lanes = svcntb();
96
101
  const auto quad_lanes = lanes / 4;
97
102
 
98
- // loop
99
103
  for (std::size_t m = 0; m < M;) {
100
104
  const auto pg = svwhilelt_b8_u64(m, M);
101
-
102
105
  const auto mm1 = svld1_u8(pg, code + m);
103
106
  {
104
107
  const auto mm1lo = svunpklo_u16(mm1);
105
108
  const auto pglo = svunpklo_b(pg);
106
109
 
107
110
  {
108
- // convert uint8 values to uint32 values
109
111
  const auto idx1 = svunpklo_u32(mm1lo);
110
112
  const auto pglolo = svunpklo_b(pglo);
111
-
112
113
  distance_codes_kernel(pglolo, idx1, offsets_0, tab, partialSum);
113
114
  tab += ksub * quad_lanes;
114
115
  }
@@ -118,10 +119,8 @@ std::enable_if_t<std::is_same_v<PQDecoderT, PQDecoder8>, float> inline distance_
118
119
  break;
119
120
 
120
121
  {
121
- // convert uint8 values to uint32 values
122
122
  const auto idx1 = svunpkhi_u32(mm1lo);
123
123
  const auto pglohi = svunpkhi_b(pglo);
124
-
125
124
  distance_codes_kernel(pglohi, idx1, offsets_0, tab, partialSum);
126
125
  tab += ksub * quad_lanes;
127
126
  }
@@ -136,10 +135,8 @@ std::enable_if_t<std::is_same_v<PQDecoderT, PQDecoder8>, float> inline distance_
136
135
  const auto pghi = svunpkhi_b(pg);
137
136
 
138
137
  {
139
- // convert uint8 values to uint32 values
140
138
  const auto idx1 = svunpklo_u32(mm1hi);
141
139
  const auto pghilo = svunpklo_b(pghi);
142
-
143
140
  distance_codes_kernel(pghilo, idx1, offsets_0, tab, partialSum);
144
141
  tab += ksub * quad_lanes;
145
142
  }
@@ -149,10 +146,8 @@ std::enable_if_t<std::is_same_v<PQDecoderT, PQDecoder8>, float> inline distance_
149
146
  break;
150
147
 
151
148
  {
152
- // convert uint8 values to uint32 values
153
149
  const auto idx1 = svunpkhi_u32(mm1hi);
154
150
  const auto pghihi = svunpkhi_b(pghi);
155
-
156
151
  distance_codes_kernel(pghihi, idx1, offsets_0, tab, partialSum);
157
152
  tab += ksub * quad_lanes;
158
153
  }
@@ -164,97 +159,17 @@ std::enable_if_t<std::is_same_v<PQDecoderT, PQDecoder8>, float> inline distance_
164
159
  return svaddv_f32(svptrue_b32(), partialSum);
165
160
  }
166
161
 
167
- template <typename PQDecoderT>
168
- std::enable_if_t<!std::is_same_v<PQDecoderT, PQDecoder8>, void>
169
- distance_four_codes_sve(
170
- // the product quantizer
171
- const size_t M,
172
- // number of bits per quantization index
173
- const size_t nbits,
174
- // precomputed distances, layout (M, ksub)
175
- const float* sim_table,
176
- // codes
177
- const uint8_t* __restrict code0,
178
- const uint8_t* __restrict code1,
179
- const uint8_t* __restrict code2,
180
- const uint8_t* __restrict code3,
181
- // computed distances
182
- float& result0,
183
- float& result1,
184
- float& result2,
185
- float& result3) {
186
- distance_four_codes_generic<PQDecoderT>(
187
- M,
188
- nbits,
189
- sim_table,
190
- code0,
191
- code1,
192
- code2,
193
- code3,
194
- result0,
195
- result1,
196
- result2,
197
- result3);
198
- }
199
-
200
- static inline void distance_four_codes_sve_for_small_m(
201
- // the product quantizer
202
- const size_t M,
203
- // precomputed distances, layout (M, ksub)
204
- const float* sim_table,
205
- // codes
206
- const uint8_t* __restrict code0,
207
- const uint8_t* __restrict code1,
208
- const uint8_t* __restrict code2,
209
- const uint8_t* __restrict code3,
210
- // computed distances
211
- float& result0,
212
- float& result1,
213
- float& result2,
214
- float& result3) {
215
- constexpr size_t nbits = 8u;
216
-
217
- const size_t ksub = 1 << nbits;
218
-
219
- const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
220
-
221
- // loop
222
- const auto pg = svwhilelt_b32_u64(0, M);
223
-
224
- auto mm10 = svld1ub_u32(pg, code0);
225
- auto mm11 = svld1ub_u32(pg, code1);
226
- auto mm12 = svld1ub_u32(pg, code2);
227
- auto mm13 = svld1ub_u32(pg, code3);
228
- mm10 = svadd_u32_x(pg, mm10, offsets_0);
229
- mm11 = svadd_u32_x(pg, mm11, offsets_0);
230
- mm12 = svadd_u32_x(pg, mm12, offsets_0);
231
- mm13 = svadd_u32_x(pg, mm13, offsets_0);
232
- const auto collected0 = svld1_gather_u32index_f32(pg, sim_table, mm10);
233
- const auto collected1 = svld1_gather_u32index_f32(pg, sim_table, mm11);
234
- const auto collected2 = svld1_gather_u32index_f32(pg, sim_table, mm12);
235
- const auto collected3 = svld1_gather_u32index_f32(pg, sim_table, mm13);
236
- result0 = svaddv_f32(pg, collected0);
237
- result1 = svaddv_f32(pg, collected1);
238
- result2 = svaddv_f32(pg, collected2);
239
- result3 = svaddv_f32(pg, collected3);
240
- }
241
-
242
- // Combines 4 operations of distance_single_code()
243
- template <typename PQDecoderT>
244
- std::enable_if_t<std::is_same_v<PQDecoderT, PQDecoder8>, void>
245
- distance_four_codes_sve(
246
- // the product quantizer
247
- const size_t M,
248
- // number of bits per quantization index
249
- const size_t nbits,
250
- // precomputed distances, layout (M, ksub)
162
+ // Combines 4 operations of pq_code_distance_single_impl().
163
+ // NOLINTNEXTLINE(facebook-hte-MisplacedTemplateSpecialization)
164
+ template <>
165
+ void pq_code_distance_four_impl<SIMDLevel::ARM_SVE>(
166
+ size_t M,
167
+ size_t nbits,
251
168
  const float* sim_table,
252
- // codes
253
169
  const uint8_t* __restrict code0,
254
170
  const uint8_t* __restrict code1,
255
171
  const uint8_t* __restrict code2,
256
172
  const uint8_t* __restrict code3,
257
- // computed distances
258
173
  float& result0,
259
174
  float& result1,
260
175
  float& result2,
@@ -275,12 +190,10 @@ distance_four_codes_sve(
275
190
  }
276
191
 
277
192
  const float* tab = sim_table;
278
-
279
193
  const size_t ksub = 1 << nbits;
280
194
 
281
195
  const auto offsets_0 = svindex_u32(0, static_cast<uint32_t>(ksub));
282
196
 
283
- // accumulators of partial sums
284
197
  auto partialSum0 = svdup_n_f32(0.f);
285
198
  auto partialSum1 = svdup_n_f32(0.f);
286
199
  auto partialSum2 = svdup_n_f32(0.f);
@@ -289,7 +202,6 @@ distance_four_codes_sve(
289
202
  const auto lanes = svcntb();
290
203
  const auto quad_lanes = lanes / 4;
291
204
 
292
- // loop
293
205
  for (std::size_t m = 0; m < M;) {
294
206
  const auto pg = svwhilelt_b8_u64(m, M);
295
207
 
@@ -434,6 +346,7 @@ distance_four_codes_sve(
434
346
  result3 = svaddv_f32(svptrue_b32(), partialSum3);
435
347
  }
436
348
 
349
+ } // namespace pq_code_distance
437
350
  } // namespace faiss
438
351
 
439
- #endif
352
+ #endif // COMPILE_SIMD_ARM_SVE
@@ -10,6 +10,7 @@
10
10
  #include <faiss/impl/AuxIndexStructures.h>
11
11
  #include <faiss/impl/FaissAssert.h>
12
12
  #include <faiss/impl/ResidualQuantizer.h>
13
+ #include <faiss/impl/simd_dispatch.h>
13
14
  #include <faiss/utils/Heap.h>
14
15
  #include <faiss/utils/distances.h>
15
16
  #include <faiss/utils/simdlib.h>
@@ -877,12 +878,14 @@ void compute_codes_add_centroids_mp_lut0(
877
878
  pool.norms.resize(n);
878
879
  // recover the norms of reconstruction as
879
880
  // || original_vector - residual ||^2
880
- for (size_t i = 0; i < n; i++) {
881
- pool.norms[i] = fvec_L2sqr(
882
- x + i * rq.d,
883
- pool.residuals.data() + i * rq.max_beam_size * rq.d,
884
- rq.d);
885
- }
881
+ with_simd_level([&]<SIMDLevel SL>() {
882
+ for (size_t i = 0; i < n; i++) {
883
+ pool.norms[i] = fvec_L2sqr<SL>(
884
+ x + i * rq.d,
885
+ pool.residuals.data() + i * rq.max_beam_size * rq.d,
886
+ rq.d);
887
+ }
888
+ });
886
889
  }
887
890
 
888
891
  // pack only the first code of the beam
@@ -0,0 +1,121 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <faiss/impl/ScalarQuantizer.h>
11
+ #include <faiss/utils/simd_levels.h>
12
+ #include <faiss/utils/simdlib.h>
13
+
14
+ namespace faiss {
15
+
16
+ namespace scalar_quantizer {
17
+
18
+ /*******************************************************************
19
+ * Codec: converts between values in [0, 1] and an index in a code
20
+ * array. The "i" parameter is the vector component index (not byte
21
+ * index).
22
+ */
23
+
24
+ template <SIMDLevel SL>
25
+ struct Codec8bit {};
26
+
27
+ template <>
28
+ struct Codec8bit<SIMDLevel::NONE> {
29
+ static FAISS_ALWAYS_INLINE void encode_component(
30
+ float x,
31
+ uint8_t* code,
32
+ size_t i) {
33
+ code[i] = (int)(255 * x);
34
+ }
35
+
36
+ static FAISS_ALWAYS_INLINE float decode_component(
37
+ const uint8_t* code,
38
+ size_t i) {
39
+ return (code[i] + 0.5f) / 255.0f;
40
+ }
41
+ };
42
+
43
+ template <SIMDLevel SL>
44
+ struct Codec4bit {};
45
+
46
+ template <>
47
+ struct Codec4bit<SIMDLevel::NONE> {
48
+ static FAISS_ALWAYS_INLINE void encode_component(
49
+ float x,
50
+ uint8_t* code,
51
+ size_t i) {
52
+ code[i / 2] |= (int)(x * 15.0) << ((i & 1) << 2);
53
+ }
54
+
55
+ static FAISS_ALWAYS_INLINE float decode_component(
56
+ const uint8_t* code,
57
+ size_t i) {
58
+ return (((code[i / 2] >> ((i & 1) << 2)) & 0xf) + 0.5f) / 15.0f;
59
+ }
60
+ };
61
+
62
+ template <SIMDLevel SL>
63
+ struct Codec6bit {};
64
+
65
+ template <>
66
+ struct Codec6bit<SIMDLevel::NONE> {
67
+ static FAISS_ALWAYS_INLINE void encode_component(
68
+ float x,
69
+ uint8_t* code,
70
+ size_t i) {
71
+ int bits = (int)(x * 63.0);
72
+ code += (i >> 2) * 3;
73
+ switch (i & 3) {
74
+ case 0:
75
+ code[0] |= bits;
76
+ break;
77
+ case 1:
78
+ code[0] |= bits << 6;
79
+ code[1] |= bits >> 2;
80
+ break;
81
+ case 2:
82
+ code[1] |= bits << 4;
83
+ code[2] |= bits >> 4;
84
+ break;
85
+ case 3:
86
+ code[2] |= bits << 2;
87
+ break;
88
+ default:
89
+ break;
90
+ }
91
+ }
92
+
93
+ static FAISS_ALWAYS_INLINE float decode_component(
94
+ const uint8_t* code,
95
+ size_t i) {
96
+ uint8_t bits = 0;
97
+ code += (i >> 2) * 3;
98
+ switch (i & 3) {
99
+ case 0:
100
+ bits = code[0] & 0x3f;
101
+ break;
102
+ case 1:
103
+ bits = code[0] >> 6;
104
+ bits |= (code[1] & 0xf) << 2;
105
+ break;
106
+ case 2:
107
+ bits = code[1] >> 4;
108
+ bits |= (code[2] & 3) << 4;
109
+ break;
110
+ case 3:
111
+ bits = code[2] >> 2;
112
+ break;
113
+ default:
114
+ break;
115
+ }
116
+ return (bits + 0.5f) / 63.0f;
117
+ }
118
+ };
119
+
120
+ } // namespace scalar_quantizer
121
+ } // namespace faiss
@@ -0,0 +1,136 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ *
4
+ * This source code is licensed under the MIT license found in the
5
+ * LICENSE file in the root directory of this source tree.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ #include <faiss/impl/ScalarQuantizer.h>
11
+ #include <faiss/utils/simd_levels.h>
12
+ #include <faiss/utils/simdlib.h>
13
+
14
+ namespace faiss {
15
+
16
+ namespace scalar_quantizer {
17
+
18
+ using SQDistanceComputer = ScalarQuantizer::SQDistanceComputer;
19
+
20
+ /*******************************************************************
21
+ * DistanceComputer: combines a similarity and a quantizer to do
22
+ * code-to-vector or code-to-code comparisons
23
+ *******************************************************************/
24
+
25
+ template <class Quantizer, class Similarity, SIMDLevel SL>
26
+ struct DCTemplate : SQDistanceComputer {};
27
+
28
+ template <class Quantizer, class Similarity>
29
+ struct DCTemplate<Quantizer, Similarity, SIMDLevel::NONE> : SQDistanceComputer {
30
+ using Sim = Similarity;
31
+
32
+ Quantizer quant;
33
+
34
+ DCTemplate(size_t d, const std::vector<float>& trained)
35
+ : quant(d, trained) {}
36
+
37
+ float compute_distance(const float* x, const uint8_t* code) const {
38
+ Similarity sim(x);
39
+ sim.begin();
40
+ for (size_t i = 0; i < quant.d; i++) {
41
+ float xi = quant.reconstruct_component(code, i);
42
+ sim.add_component(xi);
43
+ }
44
+ return sim.result();
45
+ }
46
+
47
+ float compute_code_distance(const uint8_t* code1, const uint8_t* code2)
48
+ const {
49
+ Similarity sim(nullptr);
50
+ sim.begin();
51
+ for (size_t i = 0; i < quant.d; i++) {
52
+ float x1 = quant.reconstruct_component(code1, i);
53
+ float x2 = quant.reconstruct_component(code2, i);
54
+ sim.add_component_2(x1, x2);
55
+ }
56
+ return sim.result();
57
+ }
58
+
59
+ void set_query(const float* x) final {
60
+ q = x;
61
+ }
62
+
63
+ float symmetric_dis(idx_t i, idx_t j) override {
64
+ return compute_code_distance(
65
+ codes + i * code_size, codes + j * code_size);
66
+ }
67
+
68
+ float query_to_code(const uint8_t* code) const final {
69
+ return compute_distance(q, code);
70
+ }
71
+ };
72
+
73
+ /*******************************************************************
74
+ * DistanceComputerByte: computes distances in the integer domain
75
+ *******************************************************************/
76
+
77
+ template <class Similarity, SIMDLevel SL>
78
+ struct DistanceComputerByte : SQDistanceComputer {};
79
+
80
+ template <class Similarity>
81
+ struct DistanceComputerByte<Similarity, SIMDLevel::NONE> : SQDistanceComputer {
82
+ using Sim = Similarity;
83
+
84
+ int d;
85
+ std::vector<uint8_t> tmp;
86
+
87
+ DistanceComputerByte(int d, const std::vector<float>&) : d(d), tmp(d) {}
88
+
89
+ int compute_code_distance(const uint8_t* code1, const uint8_t* code2)
90
+ const {
91
+ int accu = 0;
92
+ for (int i = 0; i < d; i++) {
93
+ if (Sim::metric_type == METRIC_INNER_PRODUCT) {
94
+ accu += int(code1[i]) * code2[i];
95
+ } else {
96
+ int diff = int(code1[i]) - code2[i];
97
+ accu += diff * diff;
98
+ }
99
+ }
100
+ return accu;
101
+ }
102
+
103
+ void set_query(const float* x) final {
104
+ for (int i = 0; i < d; i++) {
105
+ tmp[i] = int(x[i]);
106
+ }
107
+ }
108
+
109
+ int compute_distance(const float* x, const uint8_t* code) {
110
+ set_query(x);
111
+ return compute_code_distance(tmp.data(), code);
112
+ }
113
+
114
+ float symmetric_dis(idx_t i, idx_t j) override {
115
+ return compute_code_distance(
116
+ codes + i * code_size, codes + j * code_size);
117
+ }
118
+
119
+ float query_to_code(const uint8_t* code) const final {
120
+ return compute_code_distance(tmp.data(), code);
121
+ }
122
+ };
123
+
124
+ /*******************************************************************
125
+ * Selection function
126
+ *******************************************************************/
127
+
128
+ template <SIMDLevel SL>
129
+ SQDistanceComputer* sq_select_distance_computer(
130
+ MetricType metric,
131
+ ScalarQuantizer::QuantizerType qtype,
132
+ size_t d,
133
+ const std::vector<float>& trained);
134
+
135
+ } // namespace scalar_quantizer
136
+ } // namespace faiss