faiss 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +23 -21
- data/ext/faiss/extconf.rb +11 -0
- data/ext/faiss/index.cpp +4 -4
- data/ext/faiss/index_binary.cpp +6 -6
- data/ext/faiss/product_quantizer.cpp +4 -4
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +13 -0
- data/vendor/faiss/faiss/IVFlib.cpp +101 -2
- data/vendor/faiss/faiss/IVFlib.h +26 -2
- data/vendor/faiss/faiss/Index.cpp +36 -3
- data/vendor/faiss/faiss/Index.h +43 -6
- data/vendor/faiss/faiss/Index2Layer.cpp +6 -2
- data/vendor/faiss/faiss/Index2Layer.h +6 -1
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +219 -16
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +63 -5
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +299 -0
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +199 -0
- data/vendor/faiss/faiss/IndexBinary.cpp +20 -4
- data/vendor/faiss/faiss/IndexBinary.h +18 -3
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +9 -2
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -2
- data/vendor/faiss/faiss/IndexBinaryFromFloat.cpp +4 -1
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +5 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +2 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +17 -4
- data/vendor/faiss/faiss/IndexBinaryHash.h +8 -4
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +28 -13
- data/vendor/faiss/faiss/IndexBinaryIVF.h +10 -7
- data/vendor/faiss/faiss/IndexFastScan.cpp +626 -0
- data/vendor/faiss/faiss/IndexFastScan.h +145 -0
- data/vendor/faiss/faiss/IndexFlat.cpp +34 -21
- data/vendor/faiss/faiss/IndexFlat.h +7 -4
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +35 -1
- data/vendor/faiss/faiss/IndexFlatCodes.h +12 -0
- data/vendor/faiss/faiss/IndexHNSW.cpp +66 -138
- data/vendor/faiss/faiss/IndexHNSW.h +4 -2
- data/vendor/faiss/faiss/IndexIDMap.cpp +247 -0
- data/vendor/faiss/faiss/IndexIDMap.h +107 -0
- data/vendor/faiss/faiss/IndexIVF.cpp +121 -33
- data/vendor/faiss/faiss/IndexIVF.h +35 -16
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +84 -7
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +63 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +590 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +171 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +1290 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.h +213 -0
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +37 -17
- data/vendor/faiss/faiss/IndexIVFFlat.h +4 -2
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +234 -50
- data/vendor/faiss/faiss/IndexIVFPQ.h +5 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +23 -852
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -112
- data/vendor/faiss/faiss/IndexIVFPQR.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVFPQR.h +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +3 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +2 -1
- data/vendor/faiss/faiss/IndexLSH.cpp +4 -2
- data/vendor/faiss/faiss/IndexLSH.h +2 -1
- data/vendor/faiss/faiss/IndexLattice.cpp +7 -1
- data/vendor/faiss/faiss/IndexLattice.h +3 -1
- data/vendor/faiss/faiss/IndexNNDescent.cpp +4 -3
- data/vendor/faiss/faiss/IndexNNDescent.h +2 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +37 -3
- data/vendor/faiss/faiss/IndexNSG.h +25 -1
- data/vendor/faiss/faiss/IndexPQ.cpp +106 -69
- data/vendor/faiss/faiss/IndexPQ.h +19 -5
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +15 -450
- data/vendor/faiss/faiss/IndexPQFastScan.h +15 -78
- data/vendor/faiss/faiss/IndexPreTransform.cpp +47 -8
- data/vendor/faiss/faiss/IndexPreTransform.h +15 -3
- data/vendor/faiss/faiss/IndexRefine.cpp +8 -4
- data/vendor/faiss/faiss/IndexRefine.h +4 -2
- data/vendor/faiss/faiss/IndexReplicas.cpp +4 -2
- data/vendor/faiss/faiss/IndexReplicas.h +2 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +438 -0
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +92 -0
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +26 -15
- data/vendor/faiss/faiss/IndexScalarQuantizer.h +6 -7
- data/vendor/faiss/faiss/IndexShards.cpp +4 -1
- data/vendor/faiss/faiss/IndexShards.h +2 -1
- data/vendor/faiss/faiss/MetaIndexes.cpp +5 -178
- data/vendor/faiss/faiss/MetaIndexes.h +3 -81
- data/vendor/faiss/faiss/VectorTransform.cpp +43 -0
- data/vendor/faiss/faiss/VectorTransform.h +22 -4
- data/vendor/faiss/faiss/clone_index.cpp +23 -1
- data/vendor/faiss/faiss/clone_index.h +3 -0
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +300 -0
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +195 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +2058 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +408 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +2147 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +460 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +465 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +1618 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +251 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +1452 -0
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +1 -0
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +0 -4
- data/vendor/faiss/faiss/gpu/GpuIndex.h +28 -4
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +2 -1
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +10 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +75 -14
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +19 -32
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +22 -31
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +22 -28
- data/vendor/faiss/faiss/gpu/GpuResources.cpp +14 -0
- data/vendor/faiss/faiss/gpu/GpuResources.h +16 -3
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +3 -3
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +32 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +1 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +311 -75
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +10 -0
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +3 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +2 -2
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +5 -4
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +116 -47
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +44 -13
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +0 -54
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +0 -76
- data/vendor/faiss/faiss/impl/DistanceComputer.h +64 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +123 -27
- data/vendor/faiss/faiss/impl/HNSW.h +19 -16
- data/vendor/faiss/faiss/impl/IDSelector.cpp +125 -0
- data/vendor/faiss/faiss/impl/IDSelector.h +135 -0
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +6 -28
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +6 -1
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +77 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -0
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +383 -0
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +154 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +225 -145
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +29 -10
- data/vendor/faiss/faiss/impl/Quantizer.h +43 -0
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +192 -36
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +40 -20
- data/vendor/faiss/faiss/impl/ResultHandler.h +96 -0
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +97 -173
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +18 -18
- data/vendor/faiss/faiss/impl/index_read.cpp +240 -9
- data/vendor/faiss/faiss/impl/index_write.cpp +237 -5
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +6 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +56 -16
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +25 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +66 -25
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +75 -27
- data/vendor/faiss/faiss/index_factory.cpp +196 -7
- data/vendor/faiss/faiss/index_io.h +5 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -0
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +4 -1
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +27 -0
- data/vendor/faiss/faiss/python/python_callbacks.h +15 -0
- data/vendor/faiss/faiss/utils/Heap.h +31 -15
- data/vendor/faiss/faiss/utils/distances.cpp +380 -56
- data/vendor/faiss/faiss/utils/distances.h +113 -15
- data/vendor/faiss/faiss/utils/distances_simd.cpp +726 -6
- data/vendor/faiss/faiss/utils/extra_distances.cpp +12 -7
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +21 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +101 -0
- data/vendor/faiss/faiss/utils/fp16.h +11 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +54 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +0 -48
- data/vendor/faiss/faiss/utils/ordered_key_value.h +10 -0
- data/vendor/faiss/faiss/utils/quantize_lut.cpp +62 -0
- data/vendor/faiss/faiss/utils/quantize_lut.h +20 -0
- data/vendor/faiss/faiss/utils/random.cpp +53 -0
- data/vendor/faiss/faiss/utils/random.h +5 -0
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +4 -0
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +6 -1
- data/vendor/faiss/faiss/utils/simdlib_neon.h +7 -2
- metadata +37 -3
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
#include <cmath>
|
|
15
15
|
|
|
16
16
|
#include <faiss/impl/AuxIndexStructures.h>
|
|
17
|
+
#include <faiss/impl/DistanceComputer.h>
|
|
17
18
|
#include <faiss/impl/FaissAssert.h>
|
|
18
19
|
#include <faiss/utils/utils.h>
|
|
19
20
|
|
|
@@ -89,18 +90,18 @@ void knn_extra_metrics_template(
|
|
|
89
90
|
}
|
|
90
91
|
|
|
91
92
|
template <class VD>
|
|
92
|
-
struct ExtraDistanceComputer :
|
|
93
|
+
struct ExtraDistanceComputer : FlatCodesDistanceComputer {
|
|
93
94
|
VD vd;
|
|
94
95
|
Index::idx_t nb;
|
|
95
96
|
const float* q;
|
|
96
97
|
const float* b;
|
|
97
98
|
|
|
98
|
-
float
|
|
99
|
-
return vd(
|
|
99
|
+
float symmetric_dis(idx_t i, idx_t j) final {
|
|
100
|
+
return vd(b + j * vd.d, b + i * vd.d);
|
|
100
101
|
}
|
|
101
102
|
|
|
102
|
-
float
|
|
103
|
-
return vd(
|
|
103
|
+
float distance_to_code(const uint8_t* code) final {
|
|
104
|
+
return vd(q, (float*)code);
|
|
104
105
|
}
|
|
105
106
|
|
|
106
107
|
ExtraDistanceComputer(
|
|
@@ -108,7 +109,11 @@ struct ExtraDistanceComputer : DistanceComputer {
|
|
|
108
109
|
const float* xb,
|
|
109
110
|
size_t nb,
|
|
110
111
|
const float* q = nullptr)
|
|
111
|
-
:
|
|
112
|
+
: FlatCodesDistanceComputer((uint8_t*)xb, vd.d * sizeof(float)),
|
|
113
|
+
vd(vd),
|
|
114
|
+
nb(nb),
|
|
115
|
+
q(q),
|
|
116
|
+
b(xb) {}
|
|
112
117
|
|
|
113
118
|
void set_query(const float* x) override {
|
|
114
119
|
q = x;
|
|
@@ -188,7 +193,7 @@ void knn_extra_metrics(
|
|
|
188
193
|
}
|
|
189
194
|
}
|
|
190
195
|
|
|
191
|
-
|
|
196
|
+
FlatCodesDistanceComputer* get_extra_distance_computer(
|
|
192
197
|
size_t d,
|
|
193
198
|
MetricType mt,
|
|
194
199
|
float metric_arg,
|
|
@@ -18,6 +18,8 @@
|
|
|
18
18
|
|
|
19
19
|
namespace faiss {
|
|
20
20
|
|
|
21
|
+
struct FlatCodesDistanceComputer;
|
|
22
|
+
|
|
21
23
|
void pairwise_extra_distances(
|
|
22
24
|
int64_t d,
|
|
23
25
|
int64_t nq,
|
|
@@ -43,7 +45,7 @@ void knn_extra_metrics(
|
|
|
43
45
|
|
|
44
46
|
/** get a DistanceComputer that refers to this type of distance and
|
|
45
47
|
* indexes a flat array of size nb */
|
|
46
|
-
|
|
48
|
+
FlatCodesDistanceComputer* get_extra_distance_computer(
|
|
47
49
|
size_t d,
|
|
48
50
|
MetricType mt,
|
|
49
51
|
float metric_arg,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <immintrin.h>
|
|
4
|
+
#include <cstdint>
|
|
5
|
+
|
|
6
|
+
namespace faiss {
|
|
7
|
+
|
|
8
|
+
inline uint16_t encode_fp16(float x) {
|
|
9
|
+
__m128 xf = _mm_set1_ps(x);
|
|
10
|
+
__m128i xi =
|
|
11
|
+
_mm_cvtps_ph(xf, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
|
|
12
|
+
return _mm_cvtsi128_si32(xi) & 0xffff;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
inline float decode_fp16(uint16_t x) {
|
|
16
|
+
__m128i xi = _mm_set1_epi16(x);
|
|
17
|
+
__m128 xf = _mm_cvtph_ps(xi);
|
|
18
|
+
return _mm_cvtss_f32(xf);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
} // namespace faiss
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <algorithm>
|
|
4
|
+
#include <cstdint>
|
|
5
|
+
|
|
6
|
+
namespace faiss {
|
|
7
|
+
|
|
8
|
+
// non-intrinsic FP16 <-> FP32 code adapted from
|
|
9
|
+
// https://github.com/ispc/ispc/blob/master/stdlib.ispc
|
|
10
|
+
|
|
11
|
+
namespace {
|
|
12
|
+
|
|
13
|
+
inline float floatbits(uint32_t x) {
|
|
14
|
+
void* xptr = &x;
|
|
15
|
+
return *(float*)xptr;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
inline uint32_t intbits(float f) {
|
|
19
|
+
void* fptr = &f;
|
|
20
|
+
return *(uint32_t*)fptr;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
} // namespace
|
|
24
|
+
|
|
25
|
+
inline uint16_t encode_fp16(float f) {
|
|
26
|
+
// via Fabian "ryg" Giesen.
|
|
27
|
+
// https://gist.github.com/2156668
|
|
28
|
+
uint32_t sign_mask = 0x80000000u;
|
|
29
|
+
int32_t o;
|
|
30
|
+
|
|
31
|
+
uint32_t fint = intbits(f);
|
|
32
|
+
uint32_t sign = fint & sign_mask;
|
|
33
|
+
fint ^= sign;
|
|
34
|
+
|
|
35
|
+
// NOTE all the integer compares in this function can be safely
|
|
36
|
+
// compiled into signed compares since all operands are below
|
|
37
|
+
// 0x80000000. Important if you want fast straight SSE2 code (since
|
|
38
|
+
// there's no unsigned PCMPGTD).
|
|
39
|
+
|
|
40
|
+
// Inf or NaN (all exponent bits set)
|
|
41
|
+
// NaN->qNaN and Inf->Inf
|
|
42
|
+
// unconditional assignment here, will override with right value for
|
|
43
|
+
// the regular case below.
|
|
44
|
+
uint32_t f32infty = 255u << 23;
|
|
45
|
+
o = (fint > f32infty) ? 0x7e00u : 0x7c00u;
|
|
46
|
+
|
|
47
|
+
// (De)normalized number or zero
|
|
48
|
+
// update fint unconditionally to save the blending; we don't need it
|
|
49
|
+
// anymore for the Inf/NaN case anyway.
|
|
50
|
+
|
|
51
|
+
const uint32_t round_mask = ~0xfffu;
|
|
52
|
+
const uint32_t magic = 15u << 23;
|
|
53
|
+
|
|
54
|
+
// Shift exponent down, denormalize if necessary.
|
|
55
|
+
// NOTE This represents half-float denormals using single
|
|
56
|
+
// precision denormals. The main reason to do this is that
|
|
57
|
+
// there's no shift with per-lane variable shifts in SSE*, which
|
|
58
|
+
// we'd otherwise need. It has some funky side effects though:
|
|
59
|
+
// - This conversion will actually respect the FTZ (Flush To Zero)
|
|
60
|
+
// flag in MXCSR - if it's set, no half-float denormals will be
|
|
61
|
+
// generated. I'm honestly not sure whether this is good or
|
|
62
|
+
// bad. It's definitely interesting.
|
|
63
|
+
// - If the underlying HW doesn't support denormals (not an issue
|
|
64
|
+
// with Intel CPUs, but might be a problem on GPUs or PS3 SPUs),
|
|
65
|
+
// you will always get flush-to-zero behavior. This is bad,
|
|
66
|
+
// unless you're on a CPU where you don't care.
|
|
67
|
+
// - Denormals tend to be slow. FP32 denormals are rare in
|
|
68
|
+
// practice outside of things like recursive filters in DSP -
|
|
69
|
+
// not a typical half-float application. Whether FP16 denormals
|
|
70
|
+
// are rare in practice, I don't know. Whatever slow path your
|
|
71
|
+
// HW may or may not have for denormals, this may well hit it.
|
|
72
|
+
float fscale = floatbits(fint & round_mask) * floatbits(magic);
|
|
73
|
+
fscale = std::min(fscale, floatbits((31u << 23) - 0x1000u));
|
|
74
|
+
int32_t fint2 = intbits(fscale) - round_mask;
|
|
75
|
+
|
|
76
|
+
if (fint < f32infty)
|
|
77
|
+
o = fint2 >> 13; // Take the bits!
|
|
78
|
+
|
|
79
|
+
return (o | (sign >> 16));
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
inline float decode_fp16(uint16_t h) {
|
|
83
|
+
// https://gist.github.com/2144712
|
|
84
|
+
// Fabian "ryg" Giesen.
|
|
85
|
+
|
|
86
|
+
const uint32_t shifted_exp = 0x7c00u << 13; // exponent mask after shift
|
|
87
|
+
|
|
88
|
+
int32_t o = ((int32_t)(h & 0x7fffu)) << 13; // exponent/mantissa bits
|
|
89
|
+
int32_t exp = shifted_exp & o; // just the exponent
|
|
90
|
+
o += (int32_t)(127 - 15) << 23; // exponent adjust
|
|
91
|
+
|
|
92
|
+
int32_t infnan_val = o + ((int32_t)(128 - 16) << 23);
|
|
93
|
+
int32_t zerodenorm_val =
|
|
94
|
+
intbits(floatbits(o + (1u << 23)) - floatbits(113u << 23));
|
|
95
|
+
int32_t reg_val = (exp == 0) ? zerodenorm_val : o;
|
|
96
|
+
|
|
97
|
+
int32_t sign_bit = ((int32_t)(h & 0x8000u)) << 16;
|
|
98
|
+
return floatbits(((exp == shifted_exp) ? infnan_val : reg_val) | sign_bit);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
} // namespace faiss
|
|
@@ -9,6 +9,60 @@ namespace faiss {
|
|
|
9
9
|
|
|
10
10
|
extern const uint8_t hamdis_tab_ham_bytes[256];
|
|
11
11
|
|
|
12
|
+
/* Elementary Hamming distance computation: unoptimized */
|
|
13
|
+
template <size_t nbits, typename T>
|
|
14
|
+
inline T hamming(const uint8_t* bs1, const uint8_t* bs2) {
|
|
15
|
+
const size_t nbytes = nbits / 8;
|
|
16
|
+
size_t i;
|
|
17
|
+
T h = 0;
|
|
18
|
+
for (i = 0; i < nbytes; i++) {
|
|
19
|
+
h += (T)hamdis_tab_ham_bytes[bs1[i] ^ bs2[i]];
|
|
20
|
+
}
|
|
21
|
+
return h;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/* Hamming distances for multiples of 64 bits */
|
|
25
|
+
template <size_t nbits>
|
|
26
|
+
inline hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2) {
|
|
27
|
+
const size_t nwords = nbits / 64;
|
|
28
|
+
size_t i;
|
|
29
|
+
hamdis_t h = 0;
|
|
30
|
+
for (i = 0; i < nwords; i++) {
|
|
31
|
+
h += popcount64(bs1[i] ^ bs2[i]);
|
|
32
|
+
}
|
|
33
|
+
return h;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/* specialized (optimized) functions */
|
|
37
|
+
template <>
|
|
38
|
+
inline hamdis_t hamming<64>(const uint64_t* pa, const uint64_t* pb) {
|
|
39
|
+
return popcount64(pa[0] ^ pb[0]);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
template <>
|
|
43
|
+
inline hamdis_t hamming<128>(const uint64_t* pa, const uint64_t* pb) {
|
|
44
|
+
return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
template <>
|
|
48
|
+
inline hamdis_t hamming<256>(const uint64_t* pa, const uint64_t* pb) {
|
|
49
|
+
return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]) +
|
|
50
|
+
popcount64(pa[2] ^ pb[2]) + popcount64(pa[3] ^ pb[3]);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/* Hamming distances for multiple of 64 bits */
|
|
54
|
+
inline hamdis_t hamming(
|
|
55
|
+
const uint64_t* bs1,
|
|
56
|
+
const uint64_t* bs2,
|
|
57
|
+
size_t nwords) {
|
|
58
|
+
hamdis_t h = 0;
|
|
59
|
+
for (size_t i = 0; i < nwords; i++) {
|
|
60
|
+
h += popcount64(bs1[i] ^ bs2[i]);
|
|
61
|
+
}
|
|
62
|
+
return h;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// BitstringWriter and BitstringReader functions
|
|
12
66
|
inline BitstringWriter::BitstringWriter(uint8_t* code, size_t code_size)
|
|
13
67
|
: code(code), code_size(code_size), i(0) {
|
|
14
68
|
memset(code, 0, code_size);
|
|
@@ -56,54 +56,6 @@ const uint8_t hamdis_tab_ham_bytes[256] = {
|
|
|
56
56
|
4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
|
57
57
|
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
|
|
58
58
|
|
|
59
|
-
/* Elementary Hamming distance computation: unoptimized */
|
|
60
|
-
template <size_t nbits, typename T>
|
|
61
|
-
T hamming(const uint8_t* bs1, const uint8_t* bs2) {
|
|
62
|
-
const size_t nbytes = nbits / 8;
|
|
63
|
-
size_t i;
|
|
64
|
-
T h = 0;
|
|
65
|
-
for (i = 0; i < nbytes; i++)
|
|
66
|
-
h += (T)hamdis_tab_ham_bytes[bs1[i] ^ bs2[i]];
|
|
67
|
-
return h;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
/* Hamming distances for multiples of 64 bits */
|
|
71
|
-
template <size_t nbits>
|
|
72
|
-
hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2) {
|
|
73
|
-
const size_t nwords = nbits / 64;
|
|
74
|
-
size_t i;
|
|
75
|
-
hamdis_t h = 0;
|
|
76
|
-
for (i = 0; i < nwords; i++)
|
|
77
|
-
h += popcount64(bs1[i] ^ bs2[i]);
|
|
78
|
-
return h;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
/* specialized (optimized) functions */
|
|
82
|
-
template <>
|
|
83
|
-
hamdis_t hamming<64>(const uint64_t* pa, const uint64_t* pb) {
|
|
84
|
-
return popcount64(pa[0] ^ pb[0]);
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
template <>
|
|
88
|
-
hamdis_t hamming<128>(const uint64_t* pa, const uint64_t* pb) {
|
|
89
|
-
return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]);
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
template <>
|
|
93
|
-
hamdis_t hamming<256>(const uint64_t* pa, const uint64_t* pb) {
|
|
94
|
-
return popcount64(pa[0] ^ pb[0]) + popcount64(pa[1] ^ pb[1]) +
|
|
95
|
-
popcount64(pa[2] ^ pb[2]) + popcount64(pa[3] ^ pb[3]);
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
/* Hamming distances for multiple of 64 bits */
|
|
99
|
-
hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2, size_t nwords) {
|
|
100
|
-
size_t i;
|
|
101
|
-
hamdis_t h = 0;
|
|
102
|
-
for (i = 0; i < nwords; i++)
|
|
103
|
-
h += popcount64(bs1[i] ^ bs2[i]);
|
|
104
|
-
return h;
|
|
105
|
-
}
|
|
106
|
-
|
|
107
59
|
template <size_t nbits>
|
|
108
60
|
void hammings(
|
|
109
61
|
const uint64_t* bs1,
|
|
@@ -46,6 +46,11 @@ struct CMin {
|
|
|
46
46
|
inline static bool cmp(T a, T b) {
|
|
47
47
|
return a < b;
|
|
48
48
|
}
|
|
49
|
+
// Similar to cmp(), but also breaks ties
|
|
50
|
+
// by comparing the second pair of arguments.
|
|
51
|
+
inline static bool cmp2(T a1, T b1, TI a2, TI b2) {
|
|
52
|
+
return (a1 < b1) || ((a1 == b1) && (a2 < b2));
|
|
53
|
+
}
|
|
49
54
|
inline static T neutral() {
|
|
50
55
|
return std::numeric_limits<T>::lowest();
|
|
51
56
|
}
|
|
@@ -64,6 +69,11 @@ struct CMax {
|
|
|
64
69
|
inline static bool cmp(T a, T b) {
|
|
65
70
|
return a > b;
|
|
66
71
|
}
|
|
72
|
+
// Similar to cmp(), but also breaks ties
|
|
73
|
+
// by comparing the second pair of arguments.
|
|
74
|
+
inline static bool cmp2(T a1, T b1, TI a2, TI b2) {
|
|
75
|
+
return (a1 > b1) || ((a1 == b1) && (a2 > b2));
|
|
76
|
+
}
|
|
67
77
|
inline static T neutral() {
|
|
68
78
|
return std::numeric_limits<T>::max();
|
|
69
79
|
}
|
|
@@ -284,6 +284,68 @@ void quantize_LUT_and_bias(
|
|
|
284
284
|
*b_out = b;
|
|
285
285
|
}
|
|
286
286
|
|
|
287
|
+
void aq_quantize_LUT_and_bias(
|
|
288
|
+
size_t nprobe,
|
|
289
|
+
size_t M,
|
|
290
|
+
size_t ksub,
|
|
291
|
+
const float* LUT,
|
|
292
|
+
const float* bias,
|
|
293
|
+
size_t M_norm,
|
|
294
|
+
int norm_scale,
|
|
295
|
+
uint8_t* LUTq,
|
|
296
|
+
size_t M2,
|
|
297
|
+
uint16_t* biasq,
|
|
298
|
+
float* a_out,
|
|
299
|
+
float* b_out) {
|
|
300
|
+
float a, b;
|
|
301
|
+
std::vector<float> mins(M);
|
|
302
|
+
float max_span_LUT = -HUGE_VAL, max_span_dis;
|
|
303
|
+
float bias_min = tab_min(bias, nprobe);
|
|
304
|
+
float bias_max = tab_max(bias, nprobe);
|
|
305
|
+
max_span_dis = bias_max - bias_min;
|
|
306
|
+
b = 0;
|
|
307
|
+
for (int i = 0; i < M; i++) {
|
|
308
|
+
mins[i] = tab_min(LUT + i * ksub, ksub);
|
|
309
|
+
float span = tab_max(LUT + i * ksub, ksub) - mins[i];
|
|
310
|
+
max_span_LUT = std::max(max_span_LUT, span);
|
|
311
|
+
max_span_dis += (i >= M - M_norm ? span * norm_scale : span);
|
|
312
|
+
b += mins[i];
|
|
313
|
+
}
|
|
314
|
+
a = std::min(255 / max_span_LUT, 65535 / max_span_dis);
|
|
315
|
+
b += bias_min;
|
|
316
|
+
|
|
317
|
+
for (int i = 0; i < M; i++) {
|
|
318
|
+
round_tab(LUT + i * ksub, ksub, a, mins[i], LUTq + i * ksub);
|
|
319
|
+
}
|
|
320
|
+
memset(LUTq + M * ksub, 0, ksub * (M2 - M));
|
|
321
|
+
round_tab(bias, nprobe, a, bias_min, biasq);
|
|
322
|
+
|
|
323
|
+
*a_out = a;
|
|
324
|
+
*b_out = b;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
float aq_estimate_norm_scale(
|
|
328
|
+
size_t M,
|
|
329
|
+
size_t ksub,
|
|
330
|
+
size_t M_norm,
|
|
331
|
+
const float* LUT) {
|
|
332
|
+
float max_span_LUT = -HUGE_VAL;
|
|
333
|
+
for (int i = 0; i < M - M_norm; i++) {
|
|
334
|
+
float min = tab_min(LUT + i * ksub, ksub);
|
|
335
|
+
float span = tab_max(LUT + i * ksub, ksub) - min;
|
|
336
|
+
max_span_LUT = std::max(max_span_LUT, span);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
float max_span_LUT_norm = -HUGE_VAL;
|
|
340
|
+
for (int i = M - M_norm; i < M; i++) {
|
|
341
|
+
float min = tab_min(LUT + i * ksub, ksub);
|
|
342
|
+
float span = tab_max(LUT + i * ksub, ksub) - min;
|
|
343
|
+
max_span_LUT_norm = std::max(max_span_LUT_norm, span);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
return max_span_LUT_norm / max_span_LUT;
|
|
347
|
+
}
|
|
348
|
+
|
|
287
349
|
} // namespace quantize_lut
|
|
288
350
|
|
|
289
351
|
} // namespace faiss
|
|
@@ -77,6 +77,26 @@ void quantize_LUT_and_bias(
|
|
|
77
77
|
float* a_out = nullptr,
|
|
78
78
|
float* b_out = nullptr);
|
|
79
79
|
|
|
80
|
+
void aq_quantize_LUT_and_bias(
|
|
81
|
+
size_t nprobe,
|
|
82
|
+
size_t M,
|
|
83
|
+
size_t ksub,
|
|
84
|
+
const float* LUT,
|
|
85
|
+
const float* bias,
|
|
86
|
+
size_t M_norm,
|
|
87
|
+
int norm_scale,
|
|
88
|
+
uint8_t* LUTq,
|
|
89
|
+
size_t M2,
|
|
90
|
+
uint16_t* biasq,
|
|
91
|
+
float* a_out,
|
|
92
|
+
float* b_out);
|
|
93
|
+
|
|
94
|
+
float aq_estimate_norm_scale(
|
|
95
|
+
size_t M,
|
|
96
|
+
size_t ksub,
|
|
97
|
+
size_t M_norm,
|
|
98
|
+
const float* LUT);
|
|
99
|
+
|
|
80
100
|
} // namespace quantize_lut
|
|
81
101
|
|
|
82
102
|
} // namespace faiss
|
|
@@ -9,6 +9,23 @@
|
|
|
9
9
|
|
|
10
10
|
#include <faiss/utils/random.h>
|
|
11
11
|
|
|
12
|
+
extern "C" {
|
|
13
|
+
int sgemm_(
|
|
14
|
+
const char* transa,
|
|
15
|
+
const char* transb,
|
|
16
|
+
FINTEGER* m,
|
|
17
|
+
FINTEGER* n,
|
|
18
|
+
FINTEGER* k,
|
|
19
|
+
const float* alpha,
|
|
20
|
+
const float* a,
|
|
21
|
+
FINTEGER* lda,
|
|
22
|
+
const float* b,
|
|
23
|
+
FINTEGER* ldb,
|
|
24
|
+
float* beta,
|
|
25
|
+
float* c,
|
|
26
|
+
FINTEGER* ldc);
|
|
27
|
+
}
|
|
28
|
+
|
|
12
29
|
namespace faiss {
|
|
13
30
|
|
|
14
31
|
/**************************************************
|
|
@@ -165,4 +182,40 @@ void byte_rand(uint8_t* x, size_t n, int64_t seed) {
|
|
|
165
182
|
}
|
|
166
183
|
}
|
|
167
184
|
|
|
185
|
+
void rand_smooth_vectors(size_t n, size_t d, float* x, int64_t seed) {
|
|
186
|
+
size_t d1 = 10;
|
|
187
|
+
std::vector<float> x1(n * d1);
|
|
188
|
+
float_randn(x1.data(), x1.size(), seed);
|
|
189
|
+
std::vector<float> rot(d1 * d);
|
|
190
|
+
float_rand(rot.data(), rot.size(), seed + 1);
|
|
191
|
+
|
|
192
|
+
{ //
|
|
193
|
+
FINTEGER di = d, d1i = d1, ni = n;
|
|
194
|
+
float one = 1.0, zero = 0.0;
|
|
195
|
+
sgemm_("Not transposed",
|
|
196
|
+
"Not transposed", // natural order
|
|
197
|
+
&di,
|
|
198
|
+
&ni,
|
|
199
|
+
&d1i,
|
|
200
|
+
&one,
|
|
201
|
+
rot.data(),
|
|
202
|
+
&di, // rotation matrix
|
|
203
|
+
x1.data(),
|
|
204
|
+
&d1i, // second term
|
|
205
|
+
&zero,
|
|
206
|
+
x,
|
|
207
|
+
&di);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
std::vector<float> scales(d);
|
|
211
|
+
float_rand(scales.data(), d, seed + 2);
|
|
212
|
+
|
|
213
|
+
#pragma omp parallel for if (n * d > 10000)
|
|
214
|
+
for (int64_t i = 0; i < n; i++) {
|
|
215
|
+
for (size_t j = 0; j < d; j++) {
|
|
216
|
+
x[i * d + j] = sinf(x[i * d + j] * (scales[j] * 4 + 0.1));
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
168
221
|
} // namespace faiss
|
|
@@ -54,4 +54,9 @@ void int64_rand_max(int64_t* x, size_t n, uint64_t max, int64_t seed);
|
|
|
54
54
|
/* random permutation */
|
|
55
55
|
void rand_perm(int* perm, size_t n, int64_t seed);
|
|
56
56
|
|
|
57
|
+
/* Random set of vectors with intrinsic dimensionality 10 that is harder to
|
|
58
|
+
* index than a subspace of dim 10 but easier than uniform data in dimension d
|
|
59
|
+
* */
|
|
60
|
+
void rand_smooth_vectors(size_t n, size_t d, float* x, int64_t seed);
|
|
61
|
+
|
|
57
62
|
} // namespace faiss
|
|
@@ -111,6 +111,10 @@ struct simd16uint16 : simd256bit {
|
|
|
111
111
|
i = _mm256_set1_epi16((short)x);
|
|
112
112
|
}
|
|
113
113
|
|
|
114
|
+
simd16uint16 operator*(const simd16uint16& other) const {
|
|
115
|
+
return simd16uint16(_mm256_mullo_epi16(i, other.i));
|
|
116
|
+
}
|
|
117
|
+
|
|
114
118
|
// shift must be known at compile time
|
|
115
119
|
simd16uint16 operator>>(const int shift) const {
|
|
116
120
|
return simd16uint16(_mm256_srli_epi16(i, shift));
|
|
@@ -120,6 +120,11 @@ struct simd16uint16 : simd256bit {
|
|
|
120
120
|
}
|
|
121
121
|
}
|
|
122
122
|
|
|
123
|
+
simd16uint16 operator*(const simd16uint16& other) const {
|
|
124
|
+
return binary_func(
|
|
125
|
+
*this, other, [](uint16_t a, uint16_t b) { return a * b; });
|
|
126
|
+
}
|
|
127
|
+
|
|
123
128
|
// shift must be known at compile time
|
|
124
129
|
simd16uint16 operator>>(const int shift) const {
|
|
125
130
|
return unary_func(*this, [shift](uint16_t a) { return a >> shift; });
|
|
@@ -433,7 +438,7 @@ struct simd8uint32 : simd256bit {
|
|
|
433
438
|
|
|
434
439
|
explicit simd8uint32(const simd256bit& x) : simd256bit(x) {}
|
|
435
440
|
|
|
436
|
-
explicit simd8uint32(const
|
|
441
|
+
explicit simd8uint32(const uint32_t* x) : simd256bit((const void*)x) {}
|
|
437
442
|
|
|
438
443
|
std::string elements_to_string(const char* fmt) const {
|
|
439
444
|
char res[1000], *ptr = res;
|
|
@@ -260,6 +260,11 @@ struct simd16uint16 {
|
|
|
260
260
|
detail::simdlib::set1(data, &vdupq_n_u16, x);
|
|
261
261
|
}
|
|
262
262
|
|
|
263
|
+
simd16uint16 operator*(const simd16uint16& other) const {
|
|
264
|
+
return simd16uint16{
|
|
265
|
+
detail::simdlib::binary_func(data, other.data, &vmulq_u16)};
|
|
266
|
+
}
|
|
267
|
+
|
|
263
268
|
// shift must be known at compile time
|
|
264
269
|
simd16uint16 operator>>(const int shift) const {
|
|
265
270
|
switch (shift) {
|
|
@@ -641,8 +646,8 @@ inline simd32uint8 blendv(
|
|
|
641
646
|
const uint8x16x2_t msb_mask = {
|
|
642
647
|
vtstq_u8(mask.data.val[0], msb), vtstq_u8(mask.data.val[1], msb)};
|
|
643
648
|
const uint8x16x2_t selected = {
|
|
644
|
-
vbslq_u8(msb_mask.val[0],
|
|
645
|
-
vbslq_u8(msb_mask.val[1],
|
|
649
|
+
vbslq_u8(msb_mask.val[0], b.data.val[0], a.data.val[0]),
|
|
650
|
+
vbslq_u8(msb_mask.val[1], b.data.val[1], a.data.val[1])};
|
|
646
651
|
return simd32uint8{selected};
|
|
647
652
|
}
|
|
648
653
|
|