faiss 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (177) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +23 -21
  4. data/ext/faiss/extconf.rb +11 -0
  5. data/ext/faiss/index.cpp +4 -4
  6. data/ext/faiss/index_binary.cpp +6 -6
  7. data/ext/faiss/product_quantizer.cpp +4 -4
  8. data/lib/faiss/version.rb +1 -1
  9. data/vendor/faiss/faiss/AutoTune.cpp +13 -0
  10. data/vendor/faiss/faiss/IVFlib.cpp +101 -2
  11. data/vendor/faiss/faiss/IVFlib.h +26 -2
  12. data/vendor/faiss/faiss/Index.cpp +36 -3
  13. data/vendor/faiss/faiss/Index.h +43 -6
  14. data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
  15. data/vendor/faiss/faiss/Index2Layer.h +6 -1
  16. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
  17. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
  20. data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +18 -3
  22. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
  23. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
  24. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
  31. data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
  32. data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
  33. data/vendor/faiss/faiss/IndexFastScan.h +145 -0
  34. data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
  35. data/vendor/faiss/faiss/IndexFlat.h +7 -4
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
  39. data/vendor/faiss/faiss/IndexHNSW.h +4 -2
  40. data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
  41. data/vendor/faiss/faiss/IndexIDMap.h +107 -0
  42. data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
  43. data/vendor/faiss/faiss/IndexIVF.h +35 -16
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
  45. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
  46. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
  47. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
  48. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
  49. data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
  50. data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
  51. data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
  53. data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
  55. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
  56. data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
  57. data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
  58. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
  59. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
  60. data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
  61. data/vendor/faiss/faiss/IndexLSH.h +2 -1
  62. data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
  63. data/vendor/faiss/faiss/IndexLattice.h +3 -1
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
  66. data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
  67. data/vendor/faiss/faiss/IndexNSG.h +25 -1
  68. data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
  69. data/vendor/faiss/faiss/IndexPQ.h +19 -5
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
  73. data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
  74. data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
  75. data/vendor/faiss/faiss/IndexRefine.h +4 -2
  76. data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
  77. data/vendor/faiss/faiss/IndexReplicas.h +2 -1
  78. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
  79. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
  80. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
  81. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
  82. data/vendor/faiss/faiss/IndexShards.cpp +4 -1
  83. data/vendor/faiss/faiss/IndexShards.h +2 -1
  84. data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
  85. data/vendor/faiss/faiss/MetaIndexes.h +3 -81
  86. data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
  87. data/vendor/faiss/faiss/VectorTransform.h +22 -4
  88. data/vendor/faiss/faiss/clone_index.cpp +23 -1
  89. data/vendor/faiss/faiss/clone_index.h +3 -0
  90. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
  91. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
  92. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
  93. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
  94. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
  95. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
  96. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
  101. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
  102. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
  103. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  104. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
  105. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
  106. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
  107. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
  108. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
  109. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
  110. data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
  111. data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
  112. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
  113. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
  114. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
  115. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
  116. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
  117. data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
  118. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
  119. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
  124. data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
  125. data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
  126. data/vendor/faiss/faiss/impl/HNSW.h +19 -16
  127. data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
  128. data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
  131. data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
  134. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
  136. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
  138. data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
  139. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
  141. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
  142. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
  144. data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
  145. data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
  146. data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
  147. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
  148. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
  149. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
  150. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
  151. data/vendor/faiss/faiss/index_factory.cpp +196 -7
  152. data/vendor/faiss/faiss/index_io.h +5 -0
  153. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
  154. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
  155. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
  156. data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
  157. data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
  158. data/vendor/faiss/faiss/utils/Heap.h +31 -15
  159. data/vendor/faiss/faiss/utils/distances.cpp +380 -56
  160. data/vendor/faiss/faiss/utils/distances.h +113 -15
  161. data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
  162. data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
  163. data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
  164. data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
  165. data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
  166. data/vendor/faiss/faiss/utils/fp16.h +11 -0
  167. data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
  168. data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
  169. data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
  170. data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
  171. data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
  172. data/vendor/faiss/faiss/utils/random.cpp +53 -0
  173. data/vendor/faiss/faiss/utils/random.h +5 -0
  174. data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
  175. data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
  176. data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
  177. metadata +37 -3
@@ -14,6 +14,7 @@
14
14
  #include <cmath>
15
15
 
16
16
  #include <faiss/impl/AuxIndexStructures.h>
17
+ #include <faiss/impl/DistanceComputer.h>
17
18
  #include <faiss/impl/FaissAssert.h>
18
19
  #include <faiss/utils/utils.h>
19
20
 
@@ -89,18 +90,18 @@ void knn_extra_metrics_template(
89
90
  }
90
91
 
91
92
  template <class VD>
92
- struct ExtraDistanceComputer : DistanceComputer {
93
+ struct ExtraDistanceComputer : FlatCodesDistanceComputer {
93
94
  VD vd;
94
95
  Index::idx_t nb;
95
96
  const float* q;
96
97
  const float* b;
97
98
 
98
- float operator()(idx_t i) override {
99
- return vd(q, b + i * vd.d);
99
+ float symmetric_dis(idx_t i, idx_t j) final {
100
+ return vd(b + j * vd.d, b + i * vd.d);
100
101
  }
101
102
 
102
- float symmetric_dis(idx_t i, idx_t j) override {
103
- return vd(b + j * vd.d, b + i * vd.d);
103
+ float distance_to_code(const uint8_t* code) final {
104
+ return vd(q, (float*)code);
104
105
  }
105
106
 
106
107
  ExtraDistanceComputer(
@@ -108,7 +109,11 @@ struct ExtraDistanceComputer : DistanceComputer {
108
109
  const float* xb,
109
110
  size_t nb,
110
111
  const float* q = nullptr)
111
- : vd(vd), nb(nb), q(q), b(xb) {}
112
+ : FlatCodesDistanceComputer((uint8_t*)xb, vd.d * sizeof(float)),
113
+ vd(vd),
114
+ nb(nb),
115
+ q(q),
116
+ b(xb) {}
112
117
 
113
118
  void set_query(const float* x) override {
114
119
  q = x;
@@ -188,7 +193,7 @@ void knn_extra_metrics(
188
193
  }
189
194
  }
190
195
 
191
- DistanceComputer* get_extra_distance_computer(
196
+ FlatCodesDistanceComputer* get_extra_distance_computer(
192
197
  size_t d,
193
198
  MetricType mt,
194
199
  float metric_arg,
@@ -18,6 +18,8 @@
18
18
 
19
19
  namespace faiss {
20
20
 
21
+ struct FlatCodesDistanceComputer;
22
+
21
23
  void pairwise_extra_distances(
22
24
  int64_t d,
23
25
  int64_t nq,
@@ -43,7 +45,7 @@ void knn_extra_metrics(
43
45
 
44
46
  /** get a DistanceComputer that refers to this type of distance and
45
47
  * indexes a flat array of size nb */
46
- DistanceComputer* get_extra_distance_computer(
48
+ FlatCodesDistanceComputer* get_extra_distance_computer(
47
49
  size_t d,
48
50
  MetricType mt,
49
51
  float metric_arg,
@@ -0,0 +1,21 @@
1
+ #pragma once
2
+
3
+ #include <immintrin.h>
4
+ #include <cstdint>
5
+
6
+ namespace faiss {
7
+
8
+ inline uint16_t encode_fp16(float x) {
9
+ __m128 xf = _mm_set1_ps(x);
10
+ __m128i xi =
11
+ _mm_cvtps_ph(xf, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
12
+ return _mm_cvtsi128_si32(xi) & 0xffff;
13
+ }
14
+
15
+ inline float decode_fp16(uint16_t x) {
16
+ __m128i xi = _mm_set1_epi16(x);
17
+ __m128 xf = _mm_cvtph_ps(xi);
18
+ return _mm_cvtss_f32(xf);
19
+ }
20
+
21
+ } // namespace faiss
@@ -0,0 +1,101 @@
1
+ #pragma once
2
+
3
+ #include <algorithm>
4
+ #include <cstdint>
5
+
6
+ namespace faiss {
7
+
8
+ // non-intrinsic FP16 <-> FP32 code adapted from
9
+ // https://github.com/ispc/ispc/blob/master/stdlib.ispc
10
+
11
+ namespace {
12
+
13
+ inline float floatbits(uint32_t x) {
14
+ void* xptr = &x;
15
+ return *(float*)xptr;
16
+ }
17
+
18
+ inline uint32_t intbits(float f) {
19
+ void* fptr = &f;
20
+ return *(uint32_t*)fptr;
21
+ }
22
+
23
+ } // namespace
24
+
25
+ inline uint16_t encode_fp16(float f) {
26
+ // via Fabian "ryg" Giesen.
27
+ // https://gist.github.com/2156668
28
+ uint32_t sign_mask = 0x80000000u;
29
+ int32_t o;
30
+
31
+ uint32_t fint = intbits(f);
32
+ uint32_t sign = fint & sign_mask;
33
+ fint ^= sign;
34
+
35
+ // NOTE all the integer compares in this function can be safely
36
+ // compiled into signed compares since all operands are below
37
+ // 0x80000000. Important if you want fast straight SSE2 code (since
38
+ // there's no unsigned PCMPGTD).
39
+
40
+ // Inf or NaN (all exponent bits set)
41
+ // NaN->qNaN and Inf->Inf
42
+ // unconditional assignment here, will override with right value for
43
+ // the regular case below.
44
+ uint32_t f32infty = 255u << 23;
45
+ o = (fint > f32infty) ? 0x7e00u : 0x7c00u;
46
+
47
+ // (De)normalized number or zero
48
+ // update fint unconditionally to save the blending; we don't need it
49
+ // anymore for the Inf/NaN case anyway.
50
+
51
+ const uint32_t round_mask = ~0xfffu;
52
+ const uint32_t magic = 15u << 23;
53
+
54
+ // Shift exponent down, denormalize if necessary.
55
+ // NOTE This represents half-float denormals using single
56
+ // precision denormals. The main reason to do this is that
57
+ // there's no shift with per-lane variable shifts in SSE*, which
58
+ // we'd otherwise need. It has some funky side effects though:
59
+ // - This conversion will actually respect the FTZ (Flush To Zero)
60
+ // flag in MXCSR - if it's set, no half-float denormals will be
61
+ // generated. I'm honestly not sure whether this is good or
62
+ // bad. It's definitely interesting.
63
+ // - If the underlying HW doesn't support denormals (not an issue
64
+ // with Intel CPUs, but might be a problem on GPUs or PS3 SPUs),
65
+ // you will always get flush-to-zero behavior. This is bad,
66
+ // unless you're on a CPU where you don't care.
67
+ // - Denormals tend to be slow. FP32 denormals are rare in
68
+ // practice outside of things like recursive filters in DSP -
69
+ // not a typical half-float application. Whether FP16 denormals
70
+ // are rare in practice, I don't know. Whatever slow path your
71
+ // HW may or may not have for denormals, this may well hit it.
72
+ float fscale = floatbits(fint & round_mask) * floatbits(magic);
73
+ fscale = std::min(fscale, floatbits((31u << 23) - 0x1000u));
74
+ int32_t fint2 = intbits(fscale) - round_mask;
75
+
76
+ if (fint < f32infty)
77
+ o = fint2 >> 13; // Take the bits!
78
+
79
+ return (o | (sign >> 16));
80
+ }
81
+
82
+ inline float decode_fp16(uint16_t h) {
83
+ // https://gist.github.com/2144712
84
+ // Fabian "ryg" Giesen.
85
+
86
+ const uint32_t shifted_exp = 0x7c00u << 13; // exponent mask after shift
87
+
88
+ int32_t o = ((int32_t)(h & 0x7fffu)) << 13; // exponent/mantissa bits
89
+ int32_t exp = shifted_exp & o; // just the exponent
90
+ o += (int32_t)(127 - 15) << 23; // exponent adjust
91
+
92
+ int32_t infnan_val = o + ((int32_t)(128 - 16) << 23);
93
+ int32_t zerodenorm_val =
94
+ intbits(floatbits(o + (1u << 23)) - floatbits(113u << 23));
95
+ int32_t reg_val = (exp == 0) ? zerodenorm_val : o;
96
+
97
+ int32_t sign_bit = ((int32_t)(h & 0x8000u)) << 16;
98
+ return floatbits(((exp == shifted_exp) ? infnan_val : reg_val) | sign_bit);
99
+ }
100
+
101
+ } // namespace faiss
@@ -0,0 +1,11 @@
1
+ #pragma once
2
+
3
+ #include <cstdint>
4
+
5
+ #include <faiss/impl/platform_macros.h>
6
+
7
+ #if defined(__F16C__)
8
+ #include <faiss/utils/fp16-fp16c.h>
9
+ #else
10
+ #include <faiss/utils/fp16-inl.h>
11
+ #endif
@@ -9,6 +9,60 @@ namespace faiss {
9
9
 
10
10
  extern const uint8_t hamdis_tab_ham_bytes[256];
11
11
 
12
+ /* Elementary Hamming distance computation: unoptimized */
13
+ template <size_t nbits, typename T>
14
+ inline T hamming(const uint8_t* bs1, const uint8_t* bs2) {
15
+ const size_t nbytes = nbits / 8;
16
+ size_t i;
17
+ T h = 0;
18
+ for (i = 0; i < nbytes; i++) {
19
+ h += (T)hamdis_tab_ham_bytes[bs1[i] ^ bs2[i]];
20
+ }
21
+ return h;
22
+ }
23
+
24
+ /* Hamming distances for multiples of 64 bits */
25
+ template <size_t nbits>
26
+ inline hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2) {
27
+ const size_t nwords = nbits / 64;
28
+ size_t i;
29
+ hamdis_t h = 0;
30
+ for (i = 0; i < nwords; i++) {
31
+ h += popcount64(bs1[i] ^ bs2[i]);
32
+ }
33
+ return h;
34
+ }
35
+
36
+ /* specialized (optimized) functions */
37
+ template <>
38
+ inline hamdis_t hamming<64>(const uint64_t* pa, const uint64_t* pb) {
39
+ return popcount64(pa[0] ^ pb[0]);
40
+ }
41
+
42
+ template <>
43
+ inline hamdis_t hamming<128>(const uint64_t* pa, const uint64_t* pb) {
44
+ return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
45
+ }
46
+
47
+ template <>
48
+ inline hamdis_t hamming<256>(const uint64_t* pa, const uint64_t* pb) {
49
+ return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]) +
50
+ popcount64(pa[2] ^ pb[2]) + popcount64(pa[3] ^ pb[3]);
51
+ }
52
+
53
+ /* Hamming distances for multiple of 64 bits */
54
+ inline hamdis_t hamming(
55
+ const uint64_t* bs1,
56
+ const uint64_t* bs2,
57
+ size_t nwords) {
58
+ hamdis_t h = 0;
59
+ for (size_t i = 0; i < nwords; i++) {
60
+ h += popcount64(bs1[i] ^ bs2[i]);
61
+ }
62
+ return h;
63
+ }
64
+
65
+ // BitstringWriter and BitstringReader functions
12
66
  inline BitstringWriter::BitstringWriter(uint8_t* code, size_t code_size)
13
67
  : code(code), code_size(code_size), i(0) {
14
68
  memset(code, 0, code_size);
@@ -56,54 +56,6 @@ const uint8_t hamdis_tab_ham_bytes[256] = {
56
56
  4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
57
57
  4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
58
58
 
59
- /* Elementary Hamming distance computation: unoptimized */
60
- template <size_t nbits, typename T>
61
- T hamming(const uint8_t* bs1, const uint8_t* bs2) {
62
- const size_t nbytes = nbits / 8;
63
- size_t i;
64
- T h = 0;
65
- for (i = 0; i < nbytes; i++)
66
- h += (T)hamdis_tab_ham_bytes[bs1[i] ^ bs2[i]];
67
- return h;
68
- }
69
-
70
- /* Hamming distances for multiples of 64 bits */
71
- template <size_t nbits>
72
- hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2) {
73
- const size_t nwords = nbits / 64;
74
- size_t i;
75
- hamdis_t h = 0;
76
- for (i = 0; i < nwords; i++)
77
- h += popcount64(bs1[i] ^ bs2[i]);
78
- return h;
79
- }
80
-
81
- /* specialized (optimized) functions */
82
- template <>
83
- hamdis_t hamming<64>(const uint64_t* pa, const uint64_t* pb) {
84
- return popcount64(pa[0] ^ pb[0]);
85
- }
86
-
87
- template <>
88
- hamdis_t hamming<128>(const uint64_t* pa, const uint64_t* pb) {
89
- return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
90
- }
91
-
92
- template <>
93
- hamdis_t hamming<256>(const uint64_t* pa, const uint64_t* pb) {
94
- return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]) +
95
- popcount64(pa[2] ^ pb[2]) + popcount64(pa[3] ^ pb[3]);
96
- }
97
-
98
- /* Hamming distances for multiple of 64 bits */
99
- hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2, size_t nwords) {
100
- size_t i;
101
- hamdis_t h = 0;
102
- for (i = 0; i < nwords; i++)
103
- h += popcount64(bs1[i] ^ bs2[i]);
104
- return h;
105
- }
106
-
107
59
  template <size_t nbits>
108
60
  void hammings(
109
61
  const uint64_t* bs1,
@@ -46,6 +46,11 @@ struct CMin {
46
46
  inline static bool cmp(T a, T b) {
47
47
  return a < b;
48
48
  }
49
+ // Similar to cmp(), but also breaks ties
50
+ // by comparing the second pair of arguments.
51
+ inline static bool cmp2(T a1, T b1, TI a2, TI b2) {
52
+ return (a1 < b1) || ((a1 == b1) && (a2 < b2));
53
+ }
49
54
  inline static T neutral() {
50
55
  return std::numeric_limits<T>::lowest();
51
56
  }
@@ -64,6 +69,11 @@ struct CMax {
64
69
  inline static bool cmp(T a, T b) {
65
70
  return a > b;
66
71
  }
72
+ // Similar to cmp(), but also breaks ties
73
+ // by comparing the second pair of arguments.
74
+ inline static bool cmp2(T a1, T b1, TI a2, TI b2) {
75
+ return (a1 > b1) || ((a1 == b1) && (a2 > b2));
76
+ }
67
77
  inline static T neutral() {
68
78
  return std::numeric_limits<T>::max();
69
79
  }
@@ -284,6 +284,68 @@ void quantize_LUT_and_bias(
284
284
  *b_out = b;
285
285
  }
286
286
 
287
+ void aq_quantize_LUT_and_bias(
288
+ size_t nprobe,
289
+ size_t M,
290
+ size_t ksub,
291
+ const float* LUT,
292
+ const float* bias,
293
+ size_t M_norm,
294
+ int norm_scale,
295
+ uint8_t* LUTq,
296
+ size_t M2,
297
+ uint16_t* biasq,
298
+ float* a_out,
299
+ float* b_out) {
300
+ float a, b;
301
+ std::vector<float> mins(M);
302
+ float max_span_LUT = -HUGE_VAL, max_span_dis;
303
+ float bias_min = tab_min(bias, nprobe);
304
+ float bias_max = tab_max(bias, nprobe);
305
+ max_span_dis = bias_max - bias_min;
306
+ b = 0;
307
+ for (int i = 0; i < M; i++) {
308
+ mins[i] = tab_min(LUT + i * ksub, ksub);
309
+ float span = tab_max(LUT + i * ksub, ksub) - mins[i];
310
+ max_span_LUT = std::max(max_span_LUT, span);
311
+ max_span_dis += (i >= M - M_norm ? span * norm_scale : span);
312
+ b += mins[i];
313
+ }
314
+ a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
315
+ b += bias_min;
316
+
317
+ for (int i = 0; i < M; i++) {
318
+ round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
319
+ }
320
+ memset(LUTq + M * ksub, 0, ksub * (M2 - M));
321
+ round_tab(bias, nprobe, a, bias_min, biasq);
322
+
323
+ *a_out = a;
324
+ *b_out = b;
325
+ }
326
+
327
+ float aq_estimate_norm_scale(
328
+ size_t M,
329
+ size_t ksub,
330
+ size_t M_norm,
331
+ const float* LUT) {
332
+ float max_span_LUT = -HUGE_VAL;
333
+ for (int i = 0; i < M - M_norm; i++) {
334
+ float min = tab_min(LUT + i * ksub, ksub);
335
+ float span = tab_max(LUT + i * ksub, ksub) - min;
336
+ max_span_LUT = std::max(max_span_LUT, span);
337
+ }
338
+
339
+ float max_span_LUT_norm = -HUGE_VAL;
340
+ for (int i = M - M_norm; i < M; i++) {
341
+ float min = tab_min(LUT + i * ksub, ksub);
342
+ float span = tab_max(LUT + i * ksub, ksub) - min;
343
+ max_span_LUT_norm = std::max(max_span_LUT_norm, span);
344
+ }
345
+
346
+ return max_span_LUT_norm / max_span_LUT;
347
+ }
348
+
287
349
  } // namespace quantize_lut
288
350
 
289
351
  } // namespace faiss
@@ -77,6 +77,26 @@ void quantize_LUT_and_bias(
77
77
  float* a_out = nullptr,
78
78
  float* b_out = nullptr);
79
79
 
80
+ void aq_quantize_LUT_and_bias(
81
+ size_t nprobe,
82
+ size_t M,
83
+ size_t ksub,
84
+ const float* LUT,
85
+ const float* bias,
86
+ size_t M_norm,
87
+ int norm_scale,
88
+ uint8_t* LUTq,
89
+ size_t M2,
90
+ uint16_t* biasq,
91
+ float* a_out,
92
+ float* b_out);
93
+
94
+ float aq_estimate_norm_scale(
95
+ size_t M,
96
+ size_t ksub,
97
+ size_t M_norm,
98
+ const float* LUT);
99
+
80
100
  } // namespace quantize_lut
81
101
 
82
102
  } // namespace faiss
@@ -9,6 +9,23 @@
9
9
 
10
10
  #include <faiss/utils/random.h>
11
11
 
12
+ extern "C" {
13
+ int sgemm_(
14
+ const char* transa,
15
+ const char* transb,
16
+ FINTEGER* m,
17
+ FINTEGER* n,
18
+ FINTEGER* k,
19
+ const float* alpha,
20
+ const float* a,
21
+ FINTEGER* lda,
22
+ const float* b,
23
+ FINTEGER* ldb,
24
+ float* beta,
25
+ float* c,
26
+ FINTEGER* ldc);
27
+ }
28
+
12
29
  namespace faiss {
13
30
 
14
31
  /**************************************************
@@ -165,4 +182,40 @@ void byte_rand(uint8_t* x, size_t n, int64_t seed) {
165
182
  }
166
183
  }
167
184
 
185
+ void rand_smooth_vectors(size_t n, size_t d, float* x, int64_t seed) {
186
+ size_t d1 = 10;
187
+ std::vector<float> x1(n * d1);
188
+ float_randn(x1.data(), x1.size(), seed);
189
+ std::vector<float> rot(d1 * d);
190
+ float_rand(rot.data(), rot.size(), seed + 1);
191
+
192
+ { //
193
+ FINTEGER di = d, d1i = d1, ni = n;
194
+ float one = 1.0, zero = 0.0;
195
+ sgemm_("Not transposed",
196
+ "Not transposed", // natural order
197
+ &di,
198
+ &ni,
199
+ &d1i,
200
+ &one,
201
+ rot.data(),
202
+ &di, // rotation matrix
203
+ x1.data(),
204
+ &d1i, // second term
205
+ &zero,
206
+ x,
207
+ &di);
208
+ }
209
+
210
+ std::vector<float> scales(d);
211
+ float_rand(scales.data(), d, seed + 2);
212
+
213
+ #pragma omp parallel for if (n * d > 10000)
214
+ for (int64_t i = 0; i < n; i++) {
215
+ for (size_t j = 0; j < d; j++) {
216
+ x[i * d + j] = sinf(x[i * d + j] * (scales[j] * 4 + 0.1));
217
+ }
218
+ }
219
+ }
220
+
168
221
  } // namespace faiss
@@ -54,4 +54,9 @@ void int64_rand_max(int64_t* x, size_t n, uint64_t max, int64_t seed);
54
54
  /* random permutation */
55
55
  void rand_perm(int* perm, size_t n, int64_t seed);
56
56
 
57
+ /* Random set of vectors with intrinsic dimensionality 10 that is harder to
58
+ * index than a subspace of dim 10 but easier than uniform data in dimension d
59
+ * */
60
+ void rand_smooth_vectors(size_t n, size_t d, float* x, int64_t seed);
61
+
57
62
  } // namespace faiss
@@ -111,6 +111,10 @@ struct simd16uint16 : simd256bit {
111
111
  i = _mm256_set1_epi16((short)x);
112
112
  }
113
113
 
114
+ simd16uint16 operator*(const simd16uint16& other) const {
115
+ return simd16uint16(_mm256_mullo_epi16(i, other.i));
116
+ }
117
+
114
118
  // shift must be known at compile time
115
119
  simd16uint16 operator>>(const int shift) const {
116
120
  return simd16uint16(_mm256_srli_epi16(i, shift));
@@ -120,6 +120,11 @@ struct simd16uint16 : simd256bit {
120
120
  }
121
121
  }
122
122
 
123
+ simd16uint16 operator*(const simd16uint16& other) const {
124
+ return binary_func(
125
+ *this, other, [](uint16_t a, uint16_t b) { return a * b; });
126
+ }
127
+
123
128
  // shift must be known at compile time
124
129
  simd16uint16 operator>>(const int shift) const {
125
130
  return unary_func(*this, [shift](uint16_t a) { return a >> shift; });
@@ -433,7 +438,7 @@ struct simd8uint32 : simd256bit {
433
438
 
434
439
  explicit simd8uint32(const simd256bit& x) : simd256bit(x) {}
435
440
 
436
- explicit simd8uint32(const uint8_t* x) : simd256bit((const void*)x) {}
441
+ explicit simd8uint32(const uint32_t* x) : simd256bit((const void*)x) {}
437
442
 
438
443
  std::string elements_to_string(const char* fmt) const {
439
444
  char res[1000], *ptr = res;
@@ -260,6 +260,11 @@ struct simd16uint16 {
260
260
  detail::simdlib::set1(data, &vdupq_n_u16, x);
261
261
  }
262
262
 
263
+ simd16uint16 operator*(const simd16uint16& other) const {
264
+ return simd16uint16{
265
+ detail::simdlib::binary_func(data, other.data, &vmulq_u16)};
266
+ }
267
+
263
268
  // shift must be known at compile time
264
269
  simd16uint16 operator>>(const int shift) const {
265
270
  switch (shift) {
@@ -641,8 +646,8 @@ inline simd32uint8 blendv(
641
646
  const uint8x16x2_t msb_mask = {
642
647
  vtstq_u8(mask.data.val[0], msb), vtstq_u8(mask.data.val[1], msb)};
643
648
  const uint8x16x2_t selected = {
644
- vbslq_u8(msb_mask.val[0], a.data.val[0], b.data.val[0]),
645
- vbslq_u8(msb_mask.val[1], a.data.val[1], b.data.val[1])};
649
+ vbslq_u8(msb_mask.val[0], b.data.val[0], a.data.val[0]),
650
+ vbslq_u8(msb_mask.val[1], b.data.val[1], a.data.val[1])};
646
651
  return simd32uint8{selected};
647
652
  }
648
653