faiss 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/README.md +23 -21
  4. data/ext/faiss/extconf.rb +11 -0
  5. data/ext/faiss/index.cpp +4 -4
  6. data/ext/faiss/index_binary.cpp +6 -6
  7. data/ext/faiss/product_quantizer.cpp +4 -4
  8. data/lib/faiss/version.rb +1 -1
  9. data/vendor/faiss/faiss/AutoTune.cpp +13 -0
  10. data/vendor/faiss/faiss/IVFlib.cpp +101 -2
  11. data/vendor/faiss/faiss/IVFlib.h +26 -2
  12. data/vendor/faiss/faiss/Index.cpp +36 -3
  13. data/vendor/faiss/faiss/Index.h +43 -6
  14. data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
  15. data/vendor/faiss/faiss/Index2Layer.h +6 -1
  16. data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
  17. data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
  18. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
  19. data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
  20. data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
  21. data/vendor/faiss/faiss/IndexBinary.h +18 -3
  22. data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
  23. data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
  24. data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
  25. data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
  26. data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
  27. data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
  28. data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
  29. data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
  30. data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
  31. data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
  32. data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
  33. data/vendor/faiss/faiss/IndexFastScan.h +145 -0
  34. data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
  35. data/vendor/faiss/faiss/IndexFlat.h +7 -4
  36. data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
  37. data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
  38. data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
  39. data/vendor/faiss/faiss/IndexHNSW.h +4 -2
  40. data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
  41. data/vendor/faiss/faiss/IndexIDMap.h +107 -0
  42. data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
  43. data/vendor/faiss/faiss/IndexIVF.h +35 -16
  44. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
  45. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
  46. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
  47. data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
  48. data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
  49. data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
  50. data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
  51. data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
  52. data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
  53. data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
  54. data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
  55. data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
  56. data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
  57. data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
  58. data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
  59. data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
  60. data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
  61. data/vendor/faiss/faiss/IndexLSH.h +2 -1
  62. data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
  63. data/vendor/faiss/faiss/IndexLattice.h +3 -1
  64. data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
  65. data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
  66. data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
  67. data/vendor/faiss/faiss/IndexNSG.h +25 -1
  68. data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
  69. data/vendor/faiss/faiss/IndexPQ.h +19 -5
  70. data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
  71. data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
  72. data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
  73. data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
  74. data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
  75. data/vendor/faiss/faiss/IndexRefine.h +4 -2
  76. data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
  77. data/vendor/faiss/faiss/IndexReplicas.h +2 -1
  78. data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
  79. data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
  80. data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
  81. data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
  82. data/vendor/faiss/faiss/IndexShards.cpp +4 -1
  83. data/vendor/faiss/faiss/IndexShards.h +2 -1
  84. data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
  85. data/vendor/faiss/faiss/MetaIndexes.h +3 -81
  86. data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
  87. data/vendor/faiss/faiss/VectorTransform.h +22 -4
  88. data/vendor/faiss/faiss/clone_index.cpp +23 -1
  89. data/vendor/faiss/faiss/clone_index.h +3 -0
  90. data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
  91. data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
  92. data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
  93. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
  94. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
  95. data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
  96. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
  97. data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
  98. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
  99. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
  100. data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
  101. data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
  102. data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
  103. data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
  104. data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
  105. data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
  106. data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
  107. data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
  108. data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
  109. data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
  110. data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
  111. data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
  112. data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
  113. data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
  114. data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
  115. data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
  116. data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
  117. data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
  118. data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
  119. data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
  120. data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
  121. data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
  122. data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
  123. data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
  124. data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
  125. data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
  126. data/vendor/faiss/faiss/impl/HNSW.h +19 -16
  127. data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
  128. data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
  129. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
  130. data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
  131. data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
  132. data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
  133. data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
  134. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
  135. data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
  136. data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
  137. data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
  138. data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
  139. data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
  140. data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
  141. data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
  142. data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
  143. data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
  144. data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
  145. data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
  146. data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
  147. data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
  148. data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
  149. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
  150. data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
  151. data/vendor/faiss/faiss/index_factory.cpp +196 -7
  152. data/vendor/faiss/faiss/index_io.h +5 -0
  153. data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
  154. data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
  155. data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
  156. data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
  157. data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
  158. data/vendor/faiss/faiss/utils/Heap.h +31 -15
  159. data/vendor/faiss/faiss/utils/distances.cpp +380 -56
  160. data/vendor/faiss/faiss/utils/distances.h +113 -15
  161. data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
  162. data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
  163. data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
  164. data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
  165. data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
  166. data/vendor/faiss/faiss/utils/fp16.h +11 -0
  167. data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
  168. data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
  169. data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
  170. data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
  171. data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
  172. data/vendor/faiss/faiss/utils/random.cpp +53 -0
  173. data/vendor/faiss/faiss/utils/random.h +5 -0
  174. data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
  175. data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
  176. data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
  177. metadata +37 -3
@@ -14,6 +14,7 @@
14
14
  #include <cmath>
15
15
 
16
16
  #include <faiss/impl/AuxIndexStructures.h>
17
+ #include <faiss/impl/DistanceComputer.h>
17
18
  #include <faiss/impl/FaissAssert.h>
18
19
  #include <faiss/utils/utils.h>
19
20
 
@@ -89,18 +90,18 @@ void knn_extra_metrics_template(
89
90
  }
90
91
 
91
92
  template <class VD>
92
- struct ExtraDistanceComputer : DistanceComputer {
93
+ struct ExtraDistanceComputer : FlatCodesDistanceComputer {
93
94
  VD vd;
94
95
  Index::idx_t nb;
95
96
  const float* q;
96
97
  const float* b;
97
98
 
98
- float operator()(idx_t i) override {
99
- return vd(q, b + i * vd.d);
99
+ float symmetric_dis(idx_t i, idx_t j) final {
100
+ return vd(b + j * vd.d, b + i * vd.d);
100
101
  }
101
102
 
102
- float symmetric_dis(idx_t i, idx_t j) override {
103
- return vd(b + j * vd.d, b + i * vd.d);
103
+ float distance_to_code(const uint8_t* code) final {
104
+ return vd(q, (float*)code);
104
105
  }
105
106
 
106
107
  ExtraDistanceComputer(
@@ -108,7 +109,11 @@ struct ExtraDistanceComputer : DistanceComputer {
108
109
  const float* xb,
109
110
  size_t nb,
110
111
  const float* q = nullptr)
111
- : vd(vd), nb(nb), q(q), b(xb) {}
112
+ : FlatCodesDistanceComputer((uint8_t*)xb, vd.d * sizeof(float)),
113
+ vd(vd),
114
+ nb(nb),
115
+ q(q),
116
+ b(xb) {}
112
117
 
113
118
  void set_query(const float* x) override {
114
119
  q = x;
@@ -188,7 +193,7 @@ void knn_extra_metrics(
188
193
  }
189
194
  }
190
195
 
191
- DistanceComputer* get_extra_distance_computer(
196
+ FlatCodesDistanceComputer* get_extra_distance_computer(
192
197
  size_t d,
193
198
  MetricType mt,
194
199
  float metric_arg,
@@ -18,6 +18,8 @@
18
18
 
19
19
  namespace faiss {
20
20
 
21
+ struct FlatCodesDistanceComputer;
22
+
21
23
  void pairwise_extra_distances(
22
24
  int64_t d,
23
25
  int64_t nq,
@@ -43,7 +45,7 @@ void knn_extra_metrics(
43
45
 
44
46
  /** get a DistanceComputer that refers to this type of distance and
45
47
  * indexes a flat array of size nb */
46
- DistanceComputer* get_extra_distance_computer(
48
+ FlatCodesDistanceComputer* get_extra_distance_computer(
47
49
  size_t d,
48
50
  MetricType mt,
49
51
  float metric_arg,
@@ -0,0 +1,21 @@
1
+ #pragma once
2
+
3
+ #include <immintrin.h>
4
+ #include <cstdint>
5
+
6
+ namespace faiss {
7
+
8
+ inline uint16_t encode_fp16(float x) {
9
+ __m128 xf = _mm_set1_ps(x);
10
+ __m128i xi =
11
+ _mm_cvtps_ph(xf, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
12
+ return _mm_cvtsi128_si32(xi) & 0xffff;
13
+ }
14
+
15
+ inline float decode_fp16(uint16_t x) {
16
+ __m128i xi = _mm_set1_epi16(x);
17
+ __m128 xf = _mm_cvtph_ps(xi);
18
+ return _mm_cvtss_f32(xf);
19
+ }
20
+
21
+ } // namespace faiss
@@ -0,0 +1,101 @@
1
+ #pragma once
2
+
3
+ #include <algorithm>
4
+ #include <cstdint>
5
+
6
+ namespace faiss {
7
+
8
+ // non-intrinsic FP16 <-> FP32 code adapted from
9
+ // https://github.com/ispc/ispc/blob/master/stdlib.ispc
10
+
11
+ namespace {
12
+
13
+ inline float floatbits(uint32_t x) {
14
+ void* xptr = &x;
15
+ return *(float*)xptr;
16
+ }
17
+
18
+ inline uint32_t intbits(float f) {
19
+ void* fptr = &f;
20
+ return *(uint32_t*)fptr;
21
+ }
22
+
23
+ } // namespace
24
+
25
+ inline uint16_t encode_fp16(float f) {
26
+ // via Fabian "ryg" Giesen.
27
+ // https://gist.github.com/2156668
28
+ uint32_t sign_mask = 0x80000000u;
29
+ int32_t o;
30
+
31
+ uint32_t fint = intbits(f);
32
+ uint32_t sign = fint & sign_mask;
33
+ fint ^= sign;
34
+
35
+ // NOTE all the integer compares in this function can be safely
36
+ // compiled into signed compares since all operands are below
37
+ // 0x80000000. Important if you want fast straight SSE2 code (since
38
+ // there's no unsigned PCMPGTD).
39
+
40
+ // Inf or NaN (all exponent bits set)
41
+ // NaN->qNaN and Inf->Inf
42
+ // unconditional assignment here, will override with right value for
43
+ // the regular case below.
44
+ uint32_t f32infty = 255u << 23;
45
+ o = (fint > f32infty) ? 0x7e00u : 0x7c00u;
46
+
47
+ // (De)normalized number or zero
48
+ // update fint unconditionally to save the blending; we don't need it
49
+ // anymore for the Inf/NaN case anyway.
50
+
51
+ const uint32_t round_mask = ~0xfffu;
52
+ const uint32_t magic = 15u << 23;
53
+
54
+ // Shift exponent down, denormalize if necessary.
55
+ // NOTE This represents half-float denormals using single
56
+ // precision denormals. The main reason to do this is that
57
+ // there's no shift with per-lane variable shifts in SSE*, which
58
+ // we'd otherwise need. It has some funky side effects though:
59
+ // - This conversion will actually respect the FTZ (Flush To Zero)
60
+ // flag in MXCSR - if it's set, no half-float denormals will be
61
+ // generated. I'm honestly not sure whether this is good or
62
+ // bad. It's definitely interesting.
63
+ // - If the underlying HW doesn't support denormals (not an issue
64
+ // with Intel CPUs, but might be a problem on GPUs or PS3 SPUs),
65
+ // you will always get flush-to-zero behavior. This is bad,
66
+ // unless you're on a CPU where you don't care.
67
+ // - Denormals tend to be slow. FP32 denormals are rare in
68
+ // practice outside of things like recursive filters in DSP -
69
+ // not a typical half-float application. Whether FP16 denormals
70
+ // are rare in practice, I don't know. Whatever slow path your
71
+ // HW may or may not have for denormals, this may well hit it.
72
+ float fscale = floatbits(fint & round_mask) * floatbits(magic);
73
+ fscale = std::min(fscale, floatbits((31u << 23) - 0x1000u));
74
+ int32_t fint2 = intbits(fscale) - round_mask;
75
+
76
+ if (fint < f32infty)
77
+ o = fint2 >> 13; // Take the bits!
78
+
79
+ return (o | (sign >> 16));
80
+ }
81
+
82
+ inline float decode_fp16(uint16_t h) {
83
+ // https://gist.github.com/2144712
84
+ // Fabian "ryg" Giesen.
85
+
86
+ const uint32_t shifted_exp = 0x7c00u << 13; // exponent mask after shift
87
+
88
+ int32_t o = ((int32_t)(h & 0x7fffu)) << 13; // exponent/mantissa bits
89
+ int32_t exp = shifted_exp & o; // just the exponent
90
+ o += (int32_t)(127 - 15) << 23; // exponent adjust
91
+
92
+ int32_t infnan_val = o + ((int32_t)(128 - 16) << 23);
93
+ int32_t zerodenorm_val =
94
+ intbits(floatbits(o + (1u << 23)) - floatbits(113u << 23));
95
+ int32_t reg_val = (exp == 0) ? zerodenorm_val : o;
96
+
97
+ int32_t sign_bit = ((int32_t)(h & 0x8000u)) << 16;
98
+ return floatbits(((exp == shifted_exp) ? infnan_val : reg_val) | sign_bit);
99
+ }
100
+
101
+ } // namespace faiss
@@ -0,0 +1,11 @@
1
+ #pragma once
2
+
3
+ #include <cstdint>
4
+
5
+ #include <faiss/impl/platform_macros.h>
6
+
7
+ #if defined(__F16C__)
8
+ #include <faiss/utils/fp16-fp16c.h>
9
+ #else
10
+ #include <faiss/utils/fp16-inl.h>
11
+ #endif
@@ -9,6 +9,60 @@ namespace faiss {
9
9
 
10
10
  extern const uint8_t hamdis_tab_ham_bytes[256];
11
11
 
12
+ /* Elementary Hamming distance computation: unoptimized */
13
+ template <size_t nbits, typename T>
14
+ inline T hamming(const uint8_t* bs1, const uint8_t* bs2) {
15
+ const size_t nbytes = nbits / 8;
16
+ size_t i;
17
+ T h = 0;
18
+ for (i = 0; i < nbytes; i++) {
19
+ h += (T)hamdis_tab_ham_bytes[bs1[i] ^ bs2[i]];
20
+ }
21
+ return h;
22
+ }
23
+
24
+ /* Hamming distances for multiples of 64 bits */
25
+ template <size_t nbits>
26
+ inline hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2) {
27
+ const size_t nwords = nbits / 64;
28
+ size_t i;
29
+ hamdis_t h = 0;
30
+ for (i = 0; i < nwords; i++) {
31
+ h += popcount64(bs1[i] ^ bs2[i]);
32
+ }
33
+ return h;
34
+ }
35
+
36
+ /* specialized (optimized) functions */
37
+ template <>
38
+ inline hamdis_t hamming<64>(const uint64_t* pa, const uint64_t* pb) {
39
+ return popcount64(pa[0] ^ pb[0]);
40
+ }
41
+
42
+ template <>
43
+ inline hamdis_t hamming<128>(const uint64_t* pa, const uint64_t* pb) {
44
+ return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
45
+ }
46
+
47
+ template <>
48
+ inline hamdis_t hamming<256>(const uint64_t* pa, const uint64_t* pb) {
49
+ return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]) +
50
+ popcount64(pa[2] ^ pb[2]) + popcount64(pa[3] ^ pb[3]);
51
+ }
52
+
53
+ /* Hamming distances for multiple of 64 bits */
54
+ inline hamdis_t hamming(
55
+ const uint64_t* bs1,
56
+ const uint64_t* bs2,
57
+ size_t nwords) {
58
+ hamdis_t h = 0;
59
+ for (size_t i = 0; i < nwords; i++) {
60
+ h += popcount64(bs1[i] ^ bs2[i]);
61
+ }
62
+ return h;
63
+ }
64
+
65
+ // BitstringWriter and BitstringReader functions
12
66
  inline BitstringWriter::BitstringWriter(uint8_t* code, size_t code_size)
13
67
  : code(code), code_size(code_size), i(0) {
14
68
  memset(code, 0, code_size);
@@ -56,54 +56,6 @@ const uint8_t hamdis_tab_ham_bytes[256] = {
56
56
  4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
57
57
  4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
58
58
 
59
- /* Elementary Hamming distance computation: unoptimized */
60
- template <size_t nbits, typename T>
61
- T hamming(const uint8_t* bs1, const uint8_t* bs2) {
62
- const size_t nbytes = nbits / 8;
63
- size_t i;
64
- T h = 0;
65
- for (i = 0; i < nbytes; i++)
66
- h += (T)hamdis_tab_ham_bytes[bs1[i] ^ bs2[i]];
67
- return h;
68
- }
69
-
70
- /* Hamming distances for multiples of 64 bits */
71
- template <size_t nbits>
72
- hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2) {
73
- const size_t nwords = nbits / 64;
74
- size_t i;
75
- hamdis_t h = 0;
76
- for (i = 0; i < nwords; i++)
77
- h += popcount64(bs1[i] ^ bs2[i]);
78
- return h;
79
- }
80
-
81
- /* specialized (optimized) functions */
82
- template <>
83
- hamdis_t hamming<64>(const uint64_t* pa, const uint64_t* pb) {
84
- return popcount64(pa[0] ^ pb[0]);
85
- }
86
-
87
- template <>
88
- hamdis_t hamming<128>(const uint64_t* pa, const uint64_t* pb) {
89
- return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
90
- }
91
-
92
- template <>
93
- hamdis_t hamming<256>(const uint64_t* pa, const uint64_t* pb) {
94
- return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]) +
95
- popcount64(pa[2] ^ pb[2]) + popcount64(pa[3] ^ pb[3]);
96
- }
97
-
98
- /* Hamming distances for multiple of 64 bits */
99
- hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2, size_t nwords) {
100
- size_t i;
101
- hamdis_t h = 0;
102
- for (i = 0; i < nwords; i++)
103
- h += popcount64(bs1[i] ^ bs2[i]);
104
- return h;
105
- }
106
-
107
59
  template <size_t nbits>
108
60
  void hammings(
109
61
  const uint64_t* bs1,
@@ -46,6 +46,11 @@ struct CMin {
46
46
  inline static bool cmp(T a, T b) {
47
47
  return a < b;
48
48
  }
49
+ // Similar to cmp(), but also breaks ties
50
+ // by comparing the second pair of arguments.
51
+ inline static bool cmp2(T a1, T b1, TI a2, TI b2) {
52
+ return (a1 < b1) || ((a1 == b1) && (a2 < b2));
53
+ }
49
54
  inline static T neutral() {
50
55
  return std::numeric_limits<T>::lowest();
51
56
  }
@@ -64,6 +69,11 @@ struct CMax {
64
69
  inline static bool cmp(T a, T b) {
65
70
  return a > b;
66
71
  }
72
+ // Similar to cmp(), but also breaks ties
73
+ // by comparing the second pair of arguments.
74
+ inline static bool cmp2(T a1, T b1, TI a2, TI b2) {
75
+ return (a1 > b1) || ((a1 == b1) && (a2 > b2));
76
+ }
67
77
  inline static T neutral() {
68
78
  return std::numeric_limits<T>::max();
69
79
  }
@@ -284,6 +284,68 @@ void quantize_LUT_and_bias(
284
284
  *b_out = b;
285
285
  }
286
286
 
287
+ void aq_quantize_LUT_and_bias(
288
+ size_t nprobe,
289
+ size_t M,
290
+ size_t ksub,
291
+ const float* LUT,
292
+ const float* bias,
293
+ size_t M_norm,
294
+ int norm_scale,
295
+ uint8_t* LUTq,
296
+ size_t M2,
297
+ uint16_t* biasq,
298
+ float* a_out,
299
+ float* b_out) {
300
+ float a, b;
301
+ std::vector<float> mins(M);
302
+ float max_span_LUT = -HUGE_VAL, max_span_dis;
303
+ float bias_min = tab_min(bias, nprobe);
304
+ float bias_max = tab_max(bias, nprobe);
305
+ max_span_dis = bias_max - bias_min;
306
+ b = 0;
307
+ for (int i = 0; i < M; i++) {
308
+ mins[i] = tab_min(LUT + i * ksub, ksub);
309
+ float span = tab_max(LUT + i * ksub, ksub) - mins[i];
310
+ max_span_LUT = std::max(max_span_LUT, span);
311
+ max_span_dis += (i >= M - M_norm ? span * norm_scale : span);
312
+ b += mins[i];
313
+ }
314
+ a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
315
+ b += bias_min;
316
+
317
+ for (int i = 0; i < M; i++) {
318
+ round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
319
+ }
320
+ memset(LUTq + M * ksub, 0, ksub * (M2 - M));
321
+ round_tab(bias, nprobe, a, bias_min, biasq);
322
+
323
+ *a_out = a;
324
+ *b_out = b;
325
+ }
326
+
327
+ float aq_estimate_norm_scale(
328
+ size_t M,
329
+ size_t ksub,
330
+ size_t M_norm,
331
+ const float* LUT) {
332
+ float max_span_LUT = -HUGE_VAL;
333
+ for (int i = 0; i < M - M_norm; i++) {
334
+ float min = tab_min(LUT + i * ksub, ksub);
335
+ float span = tab_max(LUT + i * ksub, ksub) - min;
336
+ max_span_LUT = std::max(max_span_LUT, span);
337
+ }
338
+
339
+ float max_span_LUT_norm = -HUGE_VAL;
340
+ for (int i = M - M_norm; i < M; i++) {
341
+ float min = tab_min(LUT + i * ksub, ksub);
342
+ float span = tab_max(LUT + i * ksub, ksub) - min;
343
+ max_span_LUT_norm = std::max(max_span_LUT_norm, span);
344
+ }
345
+
346
+ return max_span_LUT_norm / max_span_LUT;
347
+ }
348
+
287
349
  } // namespace quantize_lut
288
350
 
289
351
  } // namespace faiss
@@ -77,6 +77,26 @@ void quantize_LUT_and_bias(
77
77
  float* a_out = nullptr,
78
78
  float* b_out = nullptr);
79
79
 
80
+ void aq_quantize_LUT_and_bias(
81
+ size_t nprobe,
82
+ size_t M,
83
+ size_t ksub,
84
+ const float* LUT,
85
+ const float* bias,
86
+ size_t M_norm,
87
+ int norm_scale,
88
+ uint8_t* LUTq,
89
+ size_t M2,
90
+ uint16_t* biasq,
91
+ float* a_out,
92
+ float* b_out);
93
+
94
+ float aq_estimate_norm_scale(
95
+ size_t M,
96
+ size_t ksub,
97
+ size_t M_norm,
98
+ const float* LUT);
99
+
80
100
  } // namespace quantize_lut
81
101
 
82
102
  } // namespace faiss
@@ -9,6 +9,23 @@
9
9
 
10
10
  #include <faiss/utils/random.h>
11
11
 
12
+ extern "C" {
13
+ int sgemm_(
14
+ const char* transa,
15
+ const char* transb,
16
+ FINTEGER* m,
17
+ FINTEGER* n,
18
+ FINTEGER* k,
19
+ const float* alpha,
20
+ const float* a,
21
+ FINTEGER* lda,
22
+ const float* b,
23
+ FINTEGER* ldb,
24
+ float* beta,
25
+ float* c,
26
+ FINTEGER* ldc);
27
+ }
28
+
12
29
  namespace faiss {
13
30
 
14
31
  /**************************************************
@@ -165,4 +182,40 @@ void byte_rand(uint8_t* x, size_t n, int64_t seed) {
165
182
  }
166
183
  }
167
184
 
185
+ void rand_smooth_vectors(size_t n, size_t d, float* x, int64_t seed) {
186
+ size_t d1 = 10;
187
+ std::vector<float> x1(n * d1);
188
+ float_randn(x1.data(), x1.size(), seed);
189
+ std::vector<float> rot(d1 * d);
190
+ float_rand(rot.data(), rot.size(), seed + 1);
191
+
192
+ { //
193
+ FINTEGER di = d, d1i = d1, ni = n;
194
+ float one = 1.0, zero = 0.0;
195
+ sgemm_("Not transposed",
196
+ "Not transposed", // natural order
197
+ &di,
198
+ &ni,
199
+ &d1i,
200
+ &one,
201
+ rot.data(),
202
+ &di, // rotation matrix
203
+ x1.data(),
204
+ &d1i, // second term
205
+ &zero,
206
+ x,
207
+ &di);
208
+ }
209
+
210
+ std::vector<float> scales(d);
211
+ float_rand(scales.data(), d, seed + 2);
212
+
213
+ #pragma omp parallel for if (n * d > 10000)
214
+ for (int64_t i = 0; i < n; i++) {
215
+ for (size_t j = 0; j < d; j++) {
216
+ x[i * d + j] = sinf(x[i * d + j] * (scales[j] * 4 + 0.1));
217
+ }
218
+ }
219
+ }
220
+
168
221
  } // namespace faiss
@@ -54,4 +54,9 @@ void int64_rand_max(int64_t* x, size_t n, uint64_t max, int64_t seed);
54
54
  /* random permutation */
55
55
  void rand_perm(int* perm, size_t n, int64_t seed);
56
56
 
57
+ /* Random set of vectors with intrinsic dimensionality 10 that is harder to
58
+ * index than a subspace of dim 10 but easier than uniform data in dimension d
59
+ * */
60
+ void rand_smooth_vectors(size_t n, size_t d, float* x, int64_t seed);
61
+
57
62
  } // namespace faiss
@@ -111,6 +111,10 @@ struct simd16uint16 : simd256bit {
111
111
  i = _mm256_set1_epi16((short)x);
112
112
  }
113
113
 
114
+ simd16uint16 operator*(const simd16uint16& other) const {
115
+ return simd16uint16(_mm256_mullo_epi16(i, other.i));
116
+ }
117
+
114
118
  // shift must be known at compile time
115
119
  simd16uint16 operator>>(const int shift) const {
116
120
  return simd16uint16(_mm256_srli_epi16(i, shift));
@@ -120,6 +120,11 @@ struct simd16uint16 : simd256bit {
120
120
  }
121
121
  }
122
122
 
123
+ simd16uint16 operator*(const simd16uint16& other) const {
124
+ return binary_func(
125
+ *this, other, [](uint16_t a, uint16_t b) { return a * b; });
126
+ }
127
+
123
128
  // shift must be known at compile time
124
129
  simd16uint16 operator>>(const int shift) const {
125
130
  return unary_func(*this, [shift](uint16_t a) { return a >> shift; });
@@ -433,7 +438,7 @@ struct simd8uint32 : simd256bit {
433
438
 
434
439
  explicit simd8uint32(const simd256bit& x) : simd256bit(x) {}
435
440
 
436
- explicit simd8uint32(const uint8_t* x) : simd256bit((const void*)x) {}
441
+ explicit simd8uint32(const uint32_t* x) : simd256bit((const void*)x) {}
437
442
 
438
443
  std::string elements_to_string(const char* fmt) const {
439
444
  char res[1000], *ptr = res;
@@ -260,6 +260,11 @@ struct simd16uint16 {
260
260
  detail::simdlib::set1(data, &vdupq_n_u16, x);
261
261
  }
262
262
 
263
+ simd16uint16 operator*(const simd16uint16& other) const {
264
+ return simd16uint16{
265
+ detail::simdlib::binary_func(data, other.data, &vmulq_u16)};
266
+ }
267
+
263
268
  // shift must be known at compile time
264
269
  simd16uint16 operator>>(const int shift) const {
265
270
  switch (shift) {
@@ -641,8 +646,8 @@ inline simd32uint8 blendv(
641
646
  const uint8x16x2_t msb_mask = {
642
647
  vtstq_u8(mask.data.val[0], msb), vtstq_u8(mask.data.val[1], msb)};
643
648
  const uint8x16x2_t selected = {
644
- vbslq_u8(msb_mask.val[0], a.data.val[0], b.data.val[0]),
645
- vbslq_u8(msb_mask.val[1], a.data.val[1], b.data.val[1])};
649
+ vbslq_u8(msb_mask.val[0], b.data.val[0], a.data.val[0]),
650
+ vbslq_u8(msb_mask.val[1], b.data.val[1], a.data.val[1])};
646
651
  return simd32uint8{selected};
647
652
  }
648
653