faiss 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.h +1 -1
- data/vendor/faiss/faiss/Clustering.cpp +35 -4
- data/vendor/faiss/faiss/Clustering.h +10 -1
- data/vendor/faiss/faiss/IVFlib.cpp +4 -1
- data/vendor/faiss/faiss/Index.h +21 -6
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +1 -1
- data/vendor/faiss/faiss/IndexFastScan.cpp +22 -4
- data/vendor/faiss/faiss/IndexFlat.cpp +11 -7
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +159 -5
- data/vendor/faiss/faiss/IndexFlatCodes.h +20 -3
- data/vendor/faiss/faiss/IndexHNSW.cpp +143 -90
- data/vendor/faiss/faiss/IndexHNSW.h +52 -3
- data/vendor/faiss/faiss/IndexIVF.cpp +3 -3
- data/vendor/faiss/faiss/IndexIVF.h +9 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +15 -0
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.h +3 -0
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +130 -57
- data/vendor/faiss/faiss/IndexIVFFastScan.h +14 -7
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +1 -3
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +21 -2
- data/vendor/faiss/faiss/IndexLattice.cpp +1 -19
- data/vendor/faiss/faiss/IndexLattice.h +3 -22
- data/vendor/faiss/faiss/IndexNNDescent.cpp +0 -29
- data/vendor/faiss/faiss/IndexNNDescent.h +1 -1
- data/vendor/faiss/faiss/IndexNSG.h +1 -1
- data/vendor/faiss/faiss/IndexNeuralNetCodec.cpp +56 -0
- data/vendor/faiss/faiss/IndexNeuralNetCodec.h +49 -0
- data/vendor/faiss/faiss/IndexPreTransform.h +1 -1
- data/vendor/faiss/faiss/IndexRefine.cpp +5 -5
- data/vendor/faiss/faiss/IndexScalarQuantizer.cpp +3 -1
- data/vendor/faiss/faiss/MetricType.h +7 -2
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +95 -17
- data/vendor/faiss/faiss/cppcontrib/factory_tools.cpp +152 -0
- data/vendor/faiss/faiss/cppcontrib/factory_tools.h +24 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +83 -30
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +36 -4
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +6 -0
- data/vendor/faiss/faiss/gpu/GpuFaissAssert.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +2 -8
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +282 -0
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +6 -0
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +2 -0
- data/vendor/faiss/faiss/gpu/StandardGpuResources.cpp +25 -0
- data/vendor/faiss/faiss/gpu/impl/InterleavedCodes.cpp +26 -21
- data/vendor/faiss/faiss/gpu/perf/PerfClustering.cpp +6 -0
- data/vendor/faiss/faiss/gpu/test/TestCodePacking.cpp +8 -5
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +65 -0
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/gpu/utils/Timer.cpp +4 -1
- data/vendor/faiss/faiss/gpu/utils/Timer.h +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +25 -0
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +9 -1
- data/vendor/faiss/faiss/impl/DistanceComputer.h +46 -0
- data/vendor/faiss/faiss/impl/FaissAssert.h +4 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +358 -190
- data/vendor/faiss/faiss/impl/HNSW.h +43 -22
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +8 -8
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +34 -0
- data/vendor/faiss/faiss/impl/NNDescent.cpp +13 -8
- data/vendor/faiss/faiss/impl/NSG.cpp +0 -29
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +1 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +5 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +151 -32
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +719 -102
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +3 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +5 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx512.h +248 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +29 -15
- data/vendor/faiss/faiss/impl/index_read_utils.h +37 -0
- data/vendor/faiss/faiss/impl/index_write.cpp +28 -10
- data/vendor/faiss/faiss/impl/io.cpp +13 -5
- data/vendor/faiss/faiss/impl/io.h +4 -4
- data/vendor/faiss/faiss/impl/io_macros.h +6 -0
- data/vendor/faiss/faiss/impl/platform_macros.h +22 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +11 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_1.cpp +1 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +448 -1
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.cpp +5 -5
- data/vendor/faiss/faiss/impl/residual_quantizer_encode_steps.h +1 -1
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +143 -59
- data/vendor/faiss/faiss/index_factory.cpp +31 -13
- data/vendor/faiss/faiss/index_io.h +12 -5
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +28 -8
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +3 -0
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +9 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +55 -17
- data/vendor/faiss/faiss/invlists/InvertedLists.h +18 -9
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +21 -6
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +2 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +3 -3
- data/vendor/faiss/faiss/utils/Heap.h +105 -0
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +342 -0
- data/vendor/faiss/faiss/utils/NeuralNet.h +147 -0
- data/vendor/faiss/faiss/utils/bf16.h +36 -0
- data/vendor/faiss/faiss/utils/distances.cpp +58 -88
- data/vendor/faiss/faiss/utils/distances.h +5 -5
- data/vendor/faiss/faiss/utils/distances_simd.cpp +997 -9
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +70 -0
- data/vendor/faiss/faiss/utils/extra_distances.cpp +85 -137
- data/vendor/faiss/faiss/utils/extra_distances.h +3 -2
- data/vendor/faiss/faiss/utils/hamming.cpp +1 -1
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +4 -1
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +2 -1
- data/vendor/faiss/faiss/utils/random.cpp +43 -0
- data/vendor/faiss/faiss/utils/random.h +25 -0
- data/vendor/faiss/faiss/utils/simdlib.h +10 -1
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +296 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +5 -2
- data/vendor/faiss/faiss/utils/simdlib_ppc64.h +1084 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +176 -0
- data/vendor/faiss/faiss/utils/utils.cpp +10 -3
- data/vendor/faiss/faiss/utils/utils.h +3 -0
- metadata +16 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +0 -102
|
@@ -32,6 +32,9 @@ struct ScalarQuantizer : Quantizer {
|
|
|
32
32
|
QT_fp16,
|
|
33
33
|
QT_8bit_direct, ///< fast indexing of uint8s
|
|
34
34
|
QT_6bit, ///< 6 bits per component
|
|
35
|
+
QT_bf16,
|
|
36
|
+
QT_8bit_direct_signed, ///< fast indexing of signed int8s ranging from
|
|
37
|
+
///< [-128 to 127]
|
|
35
38
|
};
|
|
36
39
|
|
|
37
40
|
QuantizerType qtype = QT_8bit;
|
|
@@ -16,6 +16,11 @@
|
|
|
16
16
|
#include <faiss/impl/ProductQuantizer.h>
|
|
17
17
|
#include <faiss/impl/code_distance/code_distance-generic.h>
|
|
18
18
|
|
|
19
|
+
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78782
|
|
20
|
+
#if defined(__GNUC__) && __GNUC__ < 9
|
|
21
|
+
#define _mm_loadu_si64(x) (_mm_loadl_epi64((__m128i_u*)x))
|
|
22
|
+
#endif
|
|
23
|
+
|
|
19
24
|
namespace {
|
|
20
25
|
|
|
21
26
|
inline float horizontal_sum(const __m128 v) {
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
10
|
+
#ifdef __AVX512F__
|
|
11
|
+
|
|
12
|
+
#include <immintrin.h>
|
|
13
|
+
|
|
14
|
+
#include <type_traits>
|
|
15
|
+
|
|
16
|
+
#include <faiss/impl/ProductQuantizer.h>
|
|
17
|
+
#include <faiss/impl/code_distance/code_distance-generic.h>
|
|
18
|
+
|
|
19
|
+
namespace faiss {
|
|
20
|
+
|
|
21
|
+
// According to experiments, the AVX-512 version may be SLOWER than
|
|
22
|
+
// the AVX2 version, which is somewhat unexpected.
|
|
23
|
+
// This version is not used for now, but it may be used later.
|
|
24
|
+
//
|
|
25
|
+
// TODO: test for AMD CPUs.
|
|
26
|
+
|
|
27
|
+
template <typename PQDecoderT>
|
|
28
|
+
typename std::enable_if<!std::is_same<PQDecoderT, PQDecoder8>::value, float>::
|
|
29
|
+
type inline distance_single_code_avx512(
|
|
30
|
+
// number of subquantizers
|
|
31
|
+
const size_t M,
|
|
32
|
+
// number of bits per quantization index
|
|
33
|
+
const size_t nbits,
|
|
34
|
+
// precomputed distances, layout (M, ksub)
|
|
35
|
+
const float* sim_table,
|
|
36
|
+
const uint8_t* code) {
|
|
37
|
+
// default implementation
|
|
38
|
+
return distance_single_code_generic<PQDecoderT>(M, nbits, sim_table, code);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
template <typename PQDecoderT>
|
|
42
|
+
typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, float>::
|
|
43
|
+
type inline distance_single_code_avx512(
|
|
44
|
+
// number of subquantizers
|
|
45
|
+
const size_t M,
|
|
46
|
+
// number of bits per quantization index
|
|
47
|
+
const size_t nbits,
|
|
48
|
+
// precomputed distances, layout (M, ksub)
|
|
49
|
+
const float* sim_table,
|
|
50
|
+
const uint8_t* code0) {
|
|
51
|
+
float result0 = 0;
|
|
52
|
+
constexpr size_t ksub = 1 << 8;
|
|
53
|
+
|
|
54
|
+
size_t m = 0;
|
|
55
|
+
const size_t pqM16 = M / 16;
|
|
56
|
+
|
|
57
|
+
constexpr intptr_t N = 1;
|
|
58
|
+
|
|
59
|
+
const float* tab = sim_table;
|
|
60
|
+
|
|
61
|
+
if (pqM16 > 0) {
|
|
62
|
+
// process 16 values per loop
|
|
63
|
+
const __m512i vksub = _mm512_set1_epi32(ksub);
|
|
64
|
+
__m512i offsets_0 = _mm512_setr_epi32(
|
|
65
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
|
66
|
+
offsets_0 = _mm512_mullo_epi32(offsets_0, vksub);
|
|
67
|
+
|
|
68
|
+
// accumulators of partial sums
|
|
69
|
+
__m512 partialSums[N];
|
|
70
|
+
for (intptr_t j = 0; j < N; j++) {
|
|
71
|
+
partialSums[j] = _mm512_setzero_ps();
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// loop
|
|
75
|
+
for (m = 0; m < pqM16 * 16; m += 16) {
|
|
76
|
+
// load 16 uint8 values
|
|
77
|
+
__m128i mm1[N];
|
|
78
|
+
mm1[0] = _mm_loadu_si128((const __m128i_u*)(code0 + m));
|
|
79
|
+
|
|
80
|
+
// process first 8 codes
|
|
81
|
+
for (intptr_t j = 0; j < N; j++) {
|
|
82
|
+
const __m512i idx1 = _mm512_cvtepu8_epi32(mm1[j]);
|
|
83
|
+
|
|
84
|
+
// add offsets
|
|
85
|
+
const __m512i indices_to_read_from =
|
|
86
|
+
_mm512_add_epi32(idx1, offsets_0);
|
|
87
|
+
|
|
88
|
+
// gather 16 values, similar to 16 operations of tab[idx]
|
|
89
|
+
__m512 collected = _mm512_i32gather_ps(
|
|
90
|
+
indices_to_read_from, tab, sizeof(float));
|
|
91
|
+
|
|
92
|
+
// collect partial sums
|
|
93
|
+
partialSums[j] = _mm512_add_ps(partialSums[j], collected);
|
|
94
|
+
}
|
|
95
|
+
tab += ksub * 16;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// horizontal sum for partialSum
|
|
99
|
+
result0 += _mm512_reduce_add_ps(partialSums[0]);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
//
|
|
103
|
+
if (m < M) {
|
|
104
|
+
// process leftovers
|
|
105
|
+
PQDecoder8 decoder0(code0 + m, nbits);
|
|
106
|
+
for (; m < M; m++) {
|
|
107
|
+
result0 += tab[decoder0.decode()];
|
|
108
|
+
tab += ksub;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return result0;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
template <typename PQDecoderT>
|
|
116
|
+
typename std::enable_if<!std::is_same<PQDecoderT, PQDecoder8>::value, void>::
|
|
117
|
+
type
|
|
118
|
+
distance_four_codes_avx512(
|
|
119
|
+
// number of subquantizers
|
|
120
|
+
const size_t M,
|
|
121
|
+
// number of bits per quantization index
|
|
122
|
+
const size_t nbits,
|
|
123
|
+
// precomputed distances, layout (M, ksub)
|
|
124
|
+
const float* sim_table,
|
|
125
|
+
// codes
|
|
126
|
+
const uint8_t* __restrict code0,
|
|
127
|
+
const uint8_t* __restrict code1,
|
|
128
|
+
const uint8_t* __restrict code2,
|
|
129
|
+
const uint8_t* __restrict code3,
|
|
130
|
+
// computed distances
|
|
131
|
+
float& result0,
|
|
132
|
+
float& result1,
|
|
133
|
+
float& result2,
|
|
134
|
+
float& result3) {
|
|
135
|
+
distance_four_codes_generic<PQDecoderT>(
|
|
136
|
+
M,
|
|
137
|
+
nbits,
|
|
138
|
+
sim_table,
|
|
139
|
+
code0,
|
|
140
|
+
code1,
|
|
141
|
+
code2,
|
|
142
|
+
code3,
|
|
143
|
+
result0,
|
|
144
|
+
result1,
|
|
145
|
+
result2,
|
|
146
|
+
result3);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Combines 4 operations of distance_single_code()
|
|
150
|
+
template <typename PQDecoderT>
|
|
151
|
+
typename std::enable_if<std::is_same<PQDecoderT, PQDecoder8>::value, void>::type
|
|
152
|
+
distance_four_codes_avx512(
|
|
153
|
+
// number of subquantizers
|
|
154
|
+
const size_t M,
|
|
155
|
+
// number of bits per quantization index
|
|
156
|
+
const size_t nbits,
|
|
157
|
+
// precomputed distances, layout (M, ksub)
|
|
158
|
+
const float* sim_table,
|
|
159
|
+
// codes
|
|
160
|
+
const uint8_t* __restrict code0,
|
|
161
|
+
const uint8_t* __restrict code1,
|
|
162
|
+
const uint8_t* __restrict code2,
|
|
163
|
+
const uint8_t* __restrict code3,
|
|
164
|
+
// computed distances
|
|
165
|
+
float& result0,
|
|
166
|
+
float& result1,
|
|
167
|
+
float& result2,
|
|
168
|
+
float& result3) {
|
|
169
|
+
result0 = 0;
|
|
170
|
+
result1 = 0;
|
|
171
|
+
result2 = 0;
|
|
172
|
+
result3 = 0;
|
|
173
|
+
constexpr size_t ksub = 1 << 8;
|
|
174
|
+
|
|
175
|
+
size_t m = 0;
|
|
176
|
+
const size_t pqM16 = M / 16;
|
|
177
|
+
|
|
178
|
+
constexpr intptr_t N = 4;
|
|
179
|
+
|
|
180
|
+
const float* tab = sim_table;
|
|
181
|
+
|
|
182
|
+
if (pqM16 > 0) {
|
|
183
|
+
// process 16 values per loop
|
|
184
|
+
const __m512i vksub = _mm512_set1_epi32(ksub);
|
|
185
|
+
__m512i offsets_0 = _mm512_setr_epi32(
|
|
186
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
|
187
|
+
offsets_0 = _mm512_mullo_epi32(offsets_0, vksub);
|
|
188
|
+
|
|
189
|
+
// accumulators of partial sums
|
|
190
|
+
__m512 partialSums[N];
|
|
191
|
+
for (intptr_t j = 0; j < N; j++) {
|
|
192
|
+
partialSums[j] = _mm512_setzero_ps();
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// loop
|
|
196
|
+
for (m = 0; m < pqM16 * 16; m += 16) {
|
|
197
|
+
// load 16 uint8 values
|
|
198
|
+
__m128i mm1[N];
|
|
199
|
+
mm1[0] = _mm_loadu_si128((const __m128i_u*)(code0 + m));
|
|
200
|
+
mm1[1] = _mm_loadu_si128((const __m128i_u*)(code1 + m));
|
|
201
|
+
mm1[2] = _mm_loadu_si128((const __m128i_u*)(code2 + m));
|
|
202
|
+
mm1[3] = _mm_loadu_si128((const __m128i_u*)(code3 + m));
|
|
203
|
+
|
|
204
|
+
// process first 8 codes
|
|
205
|
+
for (intptr_t j = 0; j < N; j++) {
|
|
206
|
+
const __m512i idx1 = _mm512_cvtepu8_epi32(mm1[j]);
|
|
207
|
+
|
|
208
|
+
// add offsets
|
|
209
|
+
const __m512i indices_to_read_from =
|
|
210
|
+
_mm512_add_epi32(idx1, offsets_0);
|
|
211
|
+
|
|
212
|
+
// gather 16 values, similar to 16 operations of tab[idx]
|
|
213
|
+
__m512 collected = _mm512_i32gather_ps(
|
|
214
|
+
indices_to_read_from, tab, sizeof(float));
|
|
215
|
+
|
|
216
|
+
// collect partial sums
|
|
217
|
+
partialSums[j] = _mm512_add_ps(partialSums[j], collected);
|
|
218
|
+
}
|
|
219
|
+
tab += ksub * 16;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// horizontal sum for partialSum
|
|
223
|
+
result0 += _mm512_reduce_add_ps(partialSums[0]);
|
|
224
|
+
result1 += _mm512_reduce_add_ps(partialSums[1]);
|
|
225
|
+
result2 += _mm512_reduce_add_ps(partialSums[2]);
|
|
226
|
+
result3 += _mm512_reduce_add_ps(partialSums[3]);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
//
|
|
230
|
+
if (m < M) {
|
|
231
|
+
// process leftovers
|
|
232
|
+
PQDecoder8 decoder0(code0 + m, nbits);
|
|
233
|
+
PQDecoder8 decoder1(code1 + m, nbits);
|
|
234
|
+
PQDecoder8 decoder2(code2 + m, nbits);
|
|
235
|
+
PQDecoder8 decoder3(code3 + m, nbits);
|
|
236
|
+
for (; m < M; m++) {
|
|
237
|
+
result0 += tab[decoder0.decode()];
|
|
238
|
+
result1 += tab[decoder1.decode()];
|
|
239
|
+
result2 += tab[decoder2.decode()];
|
|
240
|
+
result3 += tab[decoder3.decode()];
|
|
241
|
+
tab += ksub;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
} // namespace faiss
|
|
247
|
+
|
|
248
|
+
#endif
|
|
@@ -5,8 +5,7 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
#include <faiss/impl/index_read_utils.h>
|
|
10
9
|
#include <faiss/index_io.h>
|
|
11
10
|
|
|
12
11
|
#include <faiss/impl/io_macros.h>
|
|
@@ -63,7 +62,7 @@ namespace faiss {
|
|
|
63
62
|
* Read
|
|
64
63
|
**************************************************************/
|
|
65
64
|
|
|
66
|
-
|
|
65
|
+
void read_index_header(Index* idx, IOReader* f) {
|
|
67
66
|
READ1(idx->d);
|
|
68
67
|
READ1(idx->ntotal);
|
|
69
68
|
idx_t dummy;
|
|
@@ -232,7 +231,7 @@ InvertedLists* read_InvertedLists(IOReader* f, int io_flags) {
|
|
|
232
231
|
}
|
|
233
232
|
}
|
|
234
233
|
|
|
235
|
-
|
|
234
|
+
void read_InvertedLists(IndexIVF* ivf, IOReader* f, int io_flags) {
|
|
236
235
|
InvertedLists* ils = read_InvertedLists(f, io_flags);
|
|
237
236
|
if (ils) {
|
|
238
237
|
FAISS_THROW_IF_NOT(ils->nlist == ivf->nlist);
|
|
@@ -244,7 +243,7 @@ static void read_InvertedLists(IndexIVF* ivf, IOReader* f, int io_flags) {
|
|
|
244
243
|
ivf->own_invlists = true;
|
|
245
244
|
}
|
|
246
245
|
|
|
247
|
-
|
|
246
|
+
void read_ProductQuantizer(ProductQuantizer* pq, IOReader* f) {
|
|
248
247
|
READ1(pq->d);
|
|
249
248
|
READ1(pq->M);
|
|
250
249
|
READ1(pq->nbits);
|
|
@@ -354,7 +353,7 @@ static void read_ProductLocalSearchQuantizer(
|
|
|
354
353
|
}
|
|
355
354
|
}
|
|
356
355
|
|
|
357
|
-
|
|
356
|
+
void read_ScalarQuantizer(ScalarQuantizer* ivsc, IOReader* f) {
|
|
358
357
|
READ1(ivsc->qtype);
|
|
359
358
|
READ1(ivsc->rangestat);
|
|
360
359
|
READ1(ivsc->rangestat_arg);
|
|
@@ -375,7 +374,10 @@ static void read_HNSW(HNSW* hnsw, IOReader* f) {
|
|
|
375
374
|
READ1(hnsw->max_level);
|
|
376
375
|
READ1(hnsw->efConstruction);
|
|
377
376
|
READ1(hnsw->efSearch);
|
|
378
|
-
|
|
377
|
+
|
|
378
|
+
// // deprecated field
|
|
379
|
+
// READ1(hnsw->upper_beam);
|
|
380
|
+
READ1_DUMMY(int)
|
|
379
381
|
}
|
|
380
382
|
|
|
381
383
|
static void read_NSG(NSG* nsg, IOReader* f) {
|
|
@@ -440,7 +442,7 @@ ProductQuantizer* read_ProductQuantizer(IOReader* reader) {
|
|
|
440
442
|
return pq;
|
|
441
443
|
}
|
|
442
444
|
|
|
443
|
-
|
|
445
|
+
void read_direct_map(DirectMap* dm, IOReader* f) {
|
|
444
446
|
char maintain_direct_map;
|
|
445
447
|
READ1(maintain_direct_map);
|
|
446
448
|
dm->type = (DirectMap::Type)maintain_direct_map;
|
|
@@ -456,10 +458,10 @@ static void read_direct_map(DirectMap* dm, IOReader* f) {
|
|
|
456
458
|
}
|
|
457
459
|
}
|
|
458
460
|
|
|
459
|
-
|
|
461
|
+
void read_ivf_header(
|
|
460
462
|
IndexIVF* ivf,
|
|
461
463
|
IOReader* f,
|
|
462
|
-
std::vector<std::vector<idx_t>>* ids
|
|
464
|
+
std::vector<std::vector<idx_t>>* ids) {
|
|
463
465
|
read_index_header(ivf, f);
|
|
464
466
|
READ1(ivf->nlist);
|
|
465
467
|
READ1(ivf->nprobe);
|
|
@@ -474,7 +476,7 @@ static void read_ivf_header(
|
|
|
474
476
|
}
|
|
475
477
|
|
|
476
478
|
// used for legacy formats
|
|
477
|
-
|
|
479
|
+
ArrayInvertedLists* set_array_invlist(
|
|
478
480
|
IndexIVF* ivf,
|
|
479
481
|
std::vector<std::vector<idx_t>>& ids) {
|
|
480
482
|
ArrayInvertedLists* ail =
|
|
@@ -531,7 +533,11 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
|
531
533
|
Index* idx = nullptr;
|
|
532
534
|
uint32_t h;
|
|
533
535
|
READ1(h);
|
|
534
|
-
if (h == fourcc("
|
|
536
|
+
if (h == fourcc("null")) {
|
|
537
|
+
// denotes a missing index, useful for some cases
|
|
538
|
+
return nullptr;
|
|
539
|
+
} else if (
|
|
540
|
+
h == fourcc("IxFI") || h == fourcc("IxF2") || h == fourcc("IxFl")) {
|
|
535
541
|
IndexFlat* idxf;
|
|
536
542
|
if (h == fourcc("IxFI")) {
|
|
537
543
|
idxf = new IndexFlatIP();
|
|
@@ -948,7 +954,7 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
|
948
954
|
idx = idxp;
|
|
949
955
|
} else if (
|
|
950
956
|
h == fourcc("IHNf") || h == fourcc("IHNp") || h == fourcc("IHNs") ||
|
|
951
|
-
h == fourcc("IHN2")) {
|
|
957
|
+
h == fourcc("IHN2") || h == fourcc("IHNc")) {
|
|
952
958
|
IndexHNSW* idxhnsw = nullptr;
|
|
953
959
|
if (h == fourcc("IHNf"))
|
|
954
960
|
idxhnsw = new IndexHNSWFlat();
|
|
@@ -958,11 +964,19 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
|
958
964
|
idxhnsw = new IndexHNSWSQ();
|
|
959
965
|
if (h == fourcc("IHN2"))
|
|
960
966
|
idxhnsw = new IndexHNSW2Level();
|
|
967
|
+
if (h == fourcc("IHNc"))
|
|
968
|
+
idxhnsw = new IndexHNSWCagra();
|
|
961
969
|
read_index_header(idxhnsw, f);
|
|
970
|
+
if (h == fourcc("IHNc")) {
|
|
971
|
+
READ1(idxhnsw->keep_max_size_level0);
|
|
972
|
+
auto idx_hnsw_cagra = dynamic_cast<IndexHNSWCagra*>(idxhnsw);
|
|
973
|
+
READ1(idx_hnsw_cagra->base_level_only);
|
|
974
|
+
READ1(idx_hnsw_cagra->num_base_level_search_entrypoints);
|
|
975
|
+
}
|
|
962
976
|
read_HNSW(&idxhnsw->hnsw, f);
|
|
963
977
|
idxhnsw->storage = read_index(f, io_flags);
|
|
964
|
-
idxhnsw->own_fields =
|
|
965
|
-
if (h == fourcc("IHNp")) {
|
|
978
|
+
idxhnsw->own_fields = idxhnsw->storage != nullptr;
|
|
979
|
+
if (h == fourcc("IHNp") && !(io_flags & IO_FLAG_PQ_SKIP_SDC_TABLE)) {
|
|
966
980
|
dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table();
|
|
967
981
|
}
|
|
968
982
|
idx = idxhnsw;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Utils for index_read
|
|
9
|
+
|
|
10
|
+
#ifndef FAISS_INDEX_READ_UTILS_H
|
|
11
|
+
#define FAISS_INDEX_READ_UTILS_H
|
|
12
|
+
|
|
13
|
+
#include <faiss/IndexIVF.h>
|
|
14
|
+
#include <faiss/impl/io.h>
|
|
15
|
+
|
|
16
|
+
#pragma once
|
|
17
|
+
|
|
18
|
+
namespace faiss {
|
|
19
|
+
struct ProductQuantizer;
|
|
20
|
+
struct ScalarQuantizer;
|
|
21
|
+
|
|
22
|
+
void read_index_header(Index* idx, IOReader* f);
|
|
23
|
+
void read_direct_map(DirectMap* dm, IOReader* f);
|
|
24
|
+
void read_ivf_header(
|
|
25
|
+
IndexIVF* ivf,
|
|
26
|
+
IOReader* f,
|
|
27
|
+
std::vector<std::vector<idx_t>>* ids = nullptr);
|
|
28
|
+
void read_InvertedLists(IndexIVF* ivf, IOReader* f, int io_flags);
|
|
29
|
+
ArrayInvertedLists* set_array_invlist(
|
|
30
|
+
IndexIVF* ivf,
|
|
31
|
+
std::vector<std::vector<idx_t>>& ids);
|
|
32
|
+
void read_ProductQuantizer(ProductQuantizer* pq, IOReader* f);
|
|
33
|
+
void read_ScalarQuantizer(ScalarQuantizer* ivsc, IOReader* f);
|
|
34
|
+
|
|
35
|
+
} // namespace faiss
|
|
36
|
+
|
|
37
|
+
#endif
|
|
@@ -5,8 +5,6 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
// -*- c++ -*-
|
|
9
|
-
|
|
10
8
|
#include <faiss/index_io.h>
|
|
11
9
|
|
|
12
10
|
#include <faiss/impl/io.h>
|
|
@@ -314,7 +312,11 @@ static void write_HNSW(const HNSW* hnsw, IOWriter* f) {
|
|
|
314
312
|
WRITE1(hnsw->max_level);
|
|
315
313
|
WRITE1(hnsw->efConstruction);
|
|
316
314
|
WRITE1(hnsw->efSearch);
|
|
317
|
-
|
|
315
|
+
|
|
316
|
+
// // deprecated field
|
|
317
|
+
// WRITE1(hnsw->upper_beam);
|
|
318
|
+
constexpr int tmp_upper_beam = 1;
|
|
319
|
+
WRITE1(tmp_upper_beam);
|
|
318
320
|
}
|
|
319
321
|
|
|
320
322
|
static void write_NSG(const NSG* nsg, IOWriter* f) {
|
|
@@ -390,8 +392,12 @@ static void write_ivf_header(const IndexIVF* ivf, IOWriter* f) {
|
|
|
390
392
|
write_direct_map(&ivf->direct_map, f);
|
|
391
393
|
}
|
|
392
394
|
|
|
393
|
-
void write_index(const Index* idx, IOWriter* f) {
|
|
394
|
-
if (
|
|
395
|
+
void write_index(const Index* idx, IOWriter* f, int io_flags) {
|
|
396
|
+
if (idx == nullptr) {
|
|
397
|
+
// eg. for a storage component of HNSW that is set to nullptr
|
|
398
|
+
uint32_t h = fourcc("null");
|
|
399
|
+
WRITE1(h);
|
|
400
|
+
} else if (const IndexFlat* idxf = dynamic_cast<const IndexFlat*>(idx)) {
|
|
395
401
|
uint32_t h =
|
|
396
402
|
fourcc(idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI"
|
|
397
403
|
: idxf->metric_type == METRIC_L2 ? "IxF2"
|
|
@@ -760,12 +766,24 @@ void write_index(const Index* idx, IOWriter* f) {
|
|
|
760
766
|
: dynamic_cast<const IndexHNSWPQ*>(idx) ? fourcc("IHNp")
|
|
761
767
|
: dynamic_cast<const IndexHNSWSQ*>(idx) ? fourcc("IHNs")
|
|
762
768
|
: dynamic_cast<const IndexHNSW2Level*>(idx) ? fourcc("IHN2")
|
|
769
|
+
: dynamic_cast<const IndexHNSWCagra*>(idx) ? fourcc("IHNc")
|
|
763
770
|
: 0;
|
|
764
771
|
FAISS_THROW_IF_NOT(h != 0);
|
|
765
772
|
WRITE1(h);
|
|
766
773
|
write_index_header(idxhnsw, f);
|
|
774
|
+
if (h == fourcc("IHNc")) {
|
|
775
|
+
WRITE1(idxhnsw->keep_max_size_level0);
|
|
776
|
+
auto idx_hnsw_cagra = dynamic_cast<const IndexHNSWCagra*>(idxhnsw);
|
|
777
|
+
WRITE1(idx_hnsw_cagra->base_level_only);
|
|
778
|
+
WRITE1(idx_hnsw_cagra->num_base_level_search_entrypoints);
|
|
779
|
+
}
|
|
767
780
|
write_HNSW(&idxhnsw->hnsw, f);
|
|
768
|
-
|
|
781
|
+
if (io_flags & IO_FLAG_SKIP_STORAGE) {
|
|
782
|
+
uint32_t n4 = fourcc("null");
|
|
783
|
+
WRITE1(n4);
|
|
784
|
+
} else {
|
|
785
|
+
write_index(idxhnsw->storage, f);
|
|
786
|
+
}
|
|
769
787
|
} else if (const IndexNSG* idxnsg = dynamic_cast<const IndexNSG*>(idx)) {
|
|
770
788
|
uint32_t h = dynamic_cast<const IndexNSGFlat*>(idx) ? fourcc("INSf")
|
|
771
789
|
: dynamic_cast<const IndexNSGPQ*>(idx) ? fourcc("INSp")
|
|
@@ -841,14 +859,14 @@ void write_index(const Index* idx, IOWriter* f) {
|
|
|
841
859
|
}
|
|
842
860
|
}
|
|
843
861
|
|
|
844
|
-
void write_index(const Index* idx, FILE* f) {
|
|
862
|
+
void write_index(const Index* idx, FILE* f, int io_flags) {
|
|
845
863
|
FileIOWriter writer(f);
|
|
846
|
-
write_index(idx, &writer);
|
|
864
|
+
write_index(idx, &writer, io_flags);
|
|
847
865
|
}
|
|
848
866
|
|
|
849
|
-
void write_index(const Index* idx, const char* fname) {
|
|
867
|
+
void write_index(const Index* idx, const char* fname, int io_flags) {
|
|
850
868
|
FileIOWriter writer(fname);
|
|
851
|
-
write_index(idx, &writer);
|
|
869
|
+
write_index(idx, &writer, io_flags);
|
|
852
870
|
}
|
|
853
871
|
|
|
854
872
|
void write_VectorTransform(const VectorTransform* vt, const char* fname) {
|
|
@@ -20,11 +20,11 @@ namespace faiss {
|
|
|
20
20
|
* IO functions
|
|
21
21
|
***********************************************************************/
|
|
22
22
|
|
|
23
|
-
int IOReader::
|
|
23
|
+
int IOReader::filedescriptor() {
|
|
24
24
|
FAISS_THROW_MSG("IOReader does not support memory mapping");
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
-
int IOWriter::
|
|
27
|
+
int IOWriter::filedescriptor() {
|
|
28
28
|
FAISS_THROW_MSG("IOWriter does not support memory mapping");
|
|
29
29
|
}
|
|
30
30
|
|
|
@@ -85,8 +85,12 @@ size_t FileIOReader::operator()(void* ptr, size_t size, size_t nitems) {
|
|
|
85
85
|
return fread(ptr, size, nitems, f);
|
|
86
86
|
}
|
|
87
87
|
|
|
88
|
-
int FileIOReader::
|
|
88
|
+
int FileIOReader::filedescriptor() {
|
|
89
|
+
#ifdef _AIX
|
|
90
|
+
return fileno(f);
|
|
91
|
+
#else
|
|
89
92
|
return ::fileno(f);
|
|
93
|
+
#endif
|
|
90
94
|
}
|
|
91
95
|
|
|
92
96
|
FileIOWriter::FileIOWriter(FILE* wf) : f(wf) {}
|
|
@@ -116,8 +120,12 @@ size_t FileIOWriter::operator()(const void* ptr, size_t size, size_t nitems) {
|
|
|
116
120
|
return fwrite(ptr, size, nitems, f);
|
|
117
121
|
}
|
|
118
122
|
|
|
119
|
-
int FileIOWriter::
|
|
123
|
+
int FileIOWriter::filedescriptor() {
|
|
124
|
+
#ifdef _AIX
|
|
125
|
+
return fileno(f);
|
|
126
|
+
#else
|
|
120
127
|
return ::fileno(f);
|
|
128
|
+
#endif
|
|
121
129
|
}
|
|
122
130
|
|
|
123
131
|
/***********************************************************************
|
|
@@ -259,7 +267,7 @@ std::string fourcc_inv_printable(uint32_t x) {
|
|
|
259
267
|
str += c;
|
|
260
268
|
} else {
|
|
261
269
|
char buf[10];
|
|
262
|
-
|
|
270
|
+
snprintf(buf, sizeof(buf), "\\x%02x", c);
|
|
263
271
|
str += buf;
|
|
264
272
|
}
|
|
265
273
|
}
|
|
@@ -32,7 +32,7 @@ struct IOReader {
|
|
|
32
32
|
virtual size_t operator()(void* ptr, size_t size, size_t nitems) = 0;
|
|
33
33
|
|
|
34
34
|
// return a file number that can be memory-mapped
|
|
35
|
-
virtual int
|
|
35
|
+
virtual int filedescriptor();
|
|
36
36
|
|
|
37
37
|
virtual ~IOReader() {}
|
|
38
38
|
};
|
|
@@ -45,7 +45,7 @@ struct IOWriter {
|
|
|
45
45
|
virtual size_t operator()(const void* ptr, size_t size, size_t nitems) = 0;
|
|
46
46
|
|
|
47
47
|
// return a file number that can be memory-mapped
|
|
48
|
-
virtual int
|
|
48
|
+
virtual int filedescriptor();
|
|
49
49
|
|
|
50
50
|
virtual ~IOWriter() noexcept(false) {}
|
|
51
51
|
};
|
|
@@ -73,7 +73,7 @@ struct FileIOReader : IOReader {
|
|
|
73
73
|
|
|
74
74
|
size_t operator()(void* ptr, size_t size, size_t nitems) override;
|
|
75
75
|
|
|
76
|
-
int
|
|
76
|
+
int filedescriptor() override;
|
|
77
77
|
};
|
|
78
78
|
|
|
79
79
|
struct FileIOWriter : IOWriter {
|
|
@@ -88,7 +88,7 @@ struct FileIOWriter : IOWriter {
|
|
|
88
88
|
|
|
89
89
|
size_t operator()(const void* ptr, size_t size, size_t nitems) override;
|
|
90
90
|
|
|
91
|
-
int
|
|
91
|
+
int filedescriptor() override;
|
|
92
92
|
};
|
|
93
93
|
|
|
94
94
|
/*******************************************************
|
|
@@ -127,6 +127,13 @@ inline int __builtin_clzll(uint64_t x) {
|
|
|
127
127
|
__pragma(float_control(precise, off, push))
|
|
128
128
|
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END __pragma(float_control(pop))
|
|
129
129
|
#elif defined(__clang__)
|
|
130
|
+
#if defined(__PPC__)
|
|
131
|
+
#define FAISS_PRAGMA_IMPRECISE_LOOP \
|
|
132
|
+
_Pragma("clang loop vectorize_width(4) interleave_count(8)")
|
|
133
|
+
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \
|
|
134
|
+
_Pragma("float_control(precise, off, push)")
|
|
135
|
+
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END _Pragma("float_control(pop)")
|
|
136
|
+
#else
|
|
130
137
|
#define FAISS_PRAGMA_IMPRECISE_LOOP \
|
|
131
138
|
_Pragma("clang loop vectorize(enable) interleave(enable)")
|
|
132
139
|
|
|
@@ -144,6 +151,7 @@ inline int __builtin_clzll(uint64_t x) {
|
|
|
144
151
|
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
|
|
145
152
|
#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END
|
|
146
153
|
#endif
|
|
154
|
+
#endif
|
|
147
155
|
#elif defined(__GNUC__)
|
|
148
156
|
// Unfortunately, GCC does not provide a pragma for detecting it.
|
|
149
157
|
// So, we have to stick to GNUC, which is defined by MANY compilers.
|
|
@@ -165,3 +173,17 @@ inline int __builtin_clzll(uint64_t x) {
|
|
|
165
173
|
#endif
|
|
166
174
|
|
|
167
175
|
// clang-format on
|
|
176
|
+
|
|
177
|
+
/*******************************************************
|
|
178
|
+
* BIGENDIAN specific macros
|
|
179
|
+
*******************************************************/
|
|
180
|
+
#if !defined(_MSC_VER) && \
|
|
181
|
+
(defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
|
|
182
|
+
#define FAISS_BIG_ENDIAN
|
|
183
|
+
#endif
|
|
184
|
+
|
|
185
|
+
#define Swap2Bytes(val) ((((val) >> 8) & 0x00FF) | (((val) << 8) & 0xFF00))
|
|
186
|
+
|
|
187
|
+
#define Swap4Bytes(val) \
|
|
188
|
+
((((val) >> 24) & 0x000000FF) | (((val) >> 8) & 0x0000FF00) | \
|
|
189
|
+
(((val) << 8) & 0x00FF0000) | (((val) << 24) & 0xFF000000))
|