faiss 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/faiss/index.cpp +25 -6
- data/ext/faiss/index_binary.cpp +17 -4
- data/ext/faiss/kmeans.cpp +6 -6
- data/lib/faiss/version.rb +1 -1
- data/vendor/faiss/faiss/AutoTune.cpp +2 -3
- data/vendor/faiss/faiss/AutoTune.h +1 -1
- data/vendor/faiss/faiss/Clustering.cpp +2 -2
- data/vendor/faiss/faiss/Clustering.h +2 -2
- data/vendor/faiss/faiss/IVFlib.cpp +1 -2
- data/vendor/faiss/faiss/IVFlib.h +1 -1
- data/vendor/faiss/faiss/Index.h +10 -10
- data/vendor/faiss/faiss/Index2Layer.cpp +1 -1
- data/vendor/faiss/faiss/Index2Layer.h +2 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.cpp +9 -4
- data/vendor/faiss/faiss/IndexAdditiveQuantizerFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexBinary.h +7 -7
- data/vendor/faiss/faiss/IndexBinaryFromFloat.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.cpp +3 -1
- data/vendor/faiss/faiss/IndexBinaryHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +3 -3
- data/vendor/faiss/faiss/IndexBinaryHash.h +5 -5
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +7 -6
- data/vendor/faiss/faiss/IndexFastScan.cpp +125 -49
- data/vendor/faiss/faiss/IndexFastScan.h +107 -7
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -1
- data/vendor/faiss/faiss/IndexHNSW.h +1 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +14 -13
- data/vendor/faiss/faiss/IndexIDMap.h +6 -6
- data/vendor/faiss/faiss/IndexIVF.cpp +1 -1
- data/vendor/faiss/faiss/IndexIVF.h +5 -5
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.cpp +9 -3
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizerFastScan.h +3 -1
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +176 -90
- data/vendor/faiss/faiss/IndexIVFFastScan.h +173 -18
- data/vendor/faiss/faiss/IndexIVFFlat.cpp +1 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.cpp +366 -0
- data/vendor/faiss/faiss/IndexIVFFlatPanorama.h +64 -0
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +3 -1
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -1
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +134 -2
- data/vendor/faiss/faiss/IndexIVFPQFastScan.h +7 -1
- data/vendor/faiss/faiss/IndexIVFRaBitQ.cpp +13 -6
- data/vendor/faiss/faiss/IndexIVFRaBitQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.cpp +650 -0
- data/vendor/faiss/faiss/IndexIVFRaBitQFastScan.h +216 -0
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +1 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.h +1 -1
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -1
- data/vendor/faiss/faiss/IndexNeuralNetCodec.h +1 -1
- data/vendor/faiss/faiss/IndexPQ.h +1 -1
- data/vendor/faiss/faiss/IndexPQFastScan.cpp +6 -2
- data/vendor/faiss/faiss/IndexPQFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexRaBitQ.cpp +13 -10
- data/vendor/faiss/faiss/IndexRaBitQ.h +7 -2
- data/vendor/faiss/faiss/IndexRaBitQFastScan.cpp +586 -0
- data/vendor/faiss/faiss/IndexRaBitQFastScan.h +149 -0
- data/vendor/faiss/faiss/IndexShards.cpp +1 -1
- data/vendor/faiss/faiss/MatrixStats.cpp +3 -3
- data/vendor/faiss/faiss/MetricType.h +1 -1
- data/vendor/faiss/faiss/VectorTransform.h +2 -2
- data/vendor/faiss/faiss/clone_index.cpp +3 -1
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndex.h +11 -11
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryCagra.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +1 -1
- data/vendor/faiss/faiss/gpu/GpuIndexCagra.h +10 -6
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIcmEncoder.cpp +7 -0
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +2 -2
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +1 -1
- data/vendor/faiss/faiss/impl/CodePacker.h +2 -2
- data/vendor/faiss/faiss/impl/DistanceComputer.h +3 -3
- data/vendor/faiss/faiss/impl/FastScanDistancePostProcessing.h +53 -0
- data/vendor/faiss/faiss/impl/HNSW.cpp +1 -1
- data/vendor/faiss/faiss/impl/HNSW.h +4 -4
- data/vendor/faiss/faiss/impl/IDSelector.cpp +2 -2
- data/vendor/faiss/faiss/impl/IDSelector.h +1 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/LookupTableScaler.h +1 -1
- data/vendor/faiss/faiss/impl/NNDescent.cpp +1 -1
- data/vendor/faiss/faiss/impl/NNDescent.h +2 -2
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -1
- data/vendor/faiss/faiss/impl/PanoramaStats.cpp +33 -0
- data/vendor/faiss/faiss/impl/PanoramaStats.h +38 -0
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +5 -5
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ProductQuantizer-inl.h +2 -0
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/RaBitQUtils.cpp +246 -0
- data/vendor/faiss/faiss/impl/RaBitQUtils.h +153 -0
- data/vendor/faiss/faiss/impl/RaBitQuantizer.cpp +54 -158
- data/vendor/faiss/faiss/impl/RaBitQuantizer.h +2 -1
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ResultHandler.h +4 -4
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +1 -1
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +1 -1
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +7 -4
- data/vendor/faiss/faiss/impl/index_read.cpp +87 -3
- data/vendor/faiss/faiss/impl/index_write.cpp +73 -3
- data/vendor/faiss/faiss/impl/io.cpp +2 -2
- data/vendor/faiss/faiss/impl/io.h +4 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +1 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +1 -1
- data/vendor/faiss/faiss/impl/lattice_Zn.h +2 -2
- data/vendor/faiss/faiss/impl/mapped_io.cpp +2 -2
- data/vendor/faiss/faiss/impl/mapped_io.h +4 -3
- data/vendor/faiss/faiss/impl/maybe_owned_vector.h +8 -1
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +30 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +14 -8
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +5 -6
- data/vendor/faiss/faiss/impl/simd_result_handlers.h +55 -11
- data/vendor/faiss/faiss/impl/zerocopy_io.h +1 -1
- data/vendor/faiss/faiss/index_factory.cpp +43 -1
- data/vendor/faiss/faiss/index_factory.h +1 -1
- data/vendor/faiss/faiss/index_io.h +1 -1
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +205 -0
- data/vendor/faiss/faiss/invlists/InvertedLists.h +62 -0
- data/vendor/faiss/faiss/utils/AlignedTable.h +1 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +2 -2
- data/vendor/faiss/faiss/utils/Heap.h +3 -3
- data/vendor/faiss/faiss/utils/NeuralNet.cpp +1 -1
- data/vendor/faiss/faiss/utils/NeuralNet.h +3 -3
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +2 -2
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +2 -2
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +1 -1
- data/vendor/faiss/faiss/utils/distances.h +2 -2
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +3 -1
- data/vendor/faiss/faiss/utils/hamming-inl.h +2 -0
- data/vendor/faiss/faiss/utils/hamming.cpp +7 -6
- data/vendor/faiss/faiss/utils/hamming.h +1 -1
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +1 -2
- data/vendor/faiss/faiss/utils/partitioning.cpp +5 -5
- data/vendor/faiss/faiss/utils/partitioning.h +2 -2
- data/vendor/faiss/faiss/utils/rabitq_simd.h +222 -336
- data/vendor/faiss/faiss/utils/random.cpp +1 -1
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +1 -1
- data/vendor/faiss/faiss/utils/simdlib_avx512.h +1 -1
- data/vendor/faiss/faiss/utils/simdlib_neon.h +2 -2
- data/vendor/faiss/faiss/utils/transpose/transpose-avx512-inl.h +1 -1
- data/vendor/faiss/faiss/utils/utils.cpp +5 -2
- data/vendor/faiss/faiss/utils/utils.h +2 -2
- metadata +12 -1
|
@@ -8,31 +8,20 @@
|
|
|
8
8
|
#include <faiss/impl/RaBitQuantizer.h>
|
|
9
9
|
|
|
10
10
|
#include <faiss/impl/FaissAssert.h>
|
|
11
|
+
#include <faiss/impl/RaBitQUtils.h>
|
|
11
12
|
#include <faiss/utils/distances.h>
|
|
12
13
|
#include <faiss/utils/rabitq_simd.h>
|
|
13
14
|
#include <algorithm>
|
|
14
15
|
#include <cmath>
|
|
15
16
|
#include <cstring>
|
|
16
|
-
#include <limits>
|
|
17
17
|
#include <memory>
|
|
18
18
|
#include <vector>
|
|
19
19
|
|
|
20
20
|
namespace faiss {
|
|
21
21
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
float dp_multiplier = 0;
|
|
26
|
-
};
|
|
27
|
-
|
|
28
|
-
struct QueryFactorsData {
|
|
29
|
-
float c1 = 0;
|
|
30
|
-
float c2 = 0;
|
|
31
|
-
float c34 = 0;
|
|
32
|
-
|
|
33
|
-
float qr_to_c_L2sqr = 0;
|
|
34
|
-
float qr_norm_L2sqr = 0;
|
|
35
|
-
};
|
|
22
|
+
// Import shared utilities from RaBitQUtils
|
|
23
|
+
using rabitq_utils::FactorsData;
|
|
24
|
+
using rabitq_utils::QueryFactorsData;
|
|
36
25
|
|
|
37
26
|
static size_t get_code_size(const size_t d) {
|
|
38
27
|
return (d + 7) / 8 + sizeof(FactorsData);
|
|
@@ -65,19 +54,9 @@ void RaBitQuantizer::compute_codes_core(
|
|
|
65
54
|
return;
|
|
66
55
|
}
|
|
67
56
|
|
|
68
|
-
// compute some helper constants
|
|
69
|
-
const float inv_d_sqrt = (d == 0) ? 1.0f : (1.0f / std::sqrt((float)d));
|
|
70
|
-
|
|
71
57
|
// compute codes
|
|
72
58
|
#pragma omp parallel for if (n > 1000)
|
|
73
59
|
for (int64_t i = 0; i < n; i++) {
|
|
74
|
-
// ||or - c||^2
|
|
75
|
-
float norm_L2sqr = 0;
|
|
76
|
-
// ||or||^2, which is equal to ||P(or)||^2 and ||P^(-1)(or)||^2
|
|
77
|
-
float or_L2sqr = 0;
|
|
78
|
-
// dot product
|
|
79
|
-
float dp_oO = 0;
|
|
80
|
-
|
|
81
60
|
// the code
|
|
82
61
|
uint8_t* code = codes + i * code_size;
|
|
83
62
|
FactorsData* fac = reinterpret_cast<FactorsData*>(code + (d + 7) / 8);
|
|
@@ -87,46 +66,25 @@ void RaBitQuantizer::compute_codes_core(
|
|
|
87
66
|
memset(code, 0, code_size);
|
|
88
67
|
}
|
|
89
68
|
|
|
90
|
-
|
|
91
|
-
const float or_minus_c = x[i * d + j] -
|
|
92
|
-
((centroid_in == nullptr) ? 0 : centroid_in[j]);
|
|
93
|
-
norm_L2sqr += or_minus_c * or_minus_c;
|
|
94
|
-
or_L2sqr += x[i * d + j] * x[i * d + j];
|
|
69
|
+
const float* x_row = x + i * d;
|
|
95
70
|
|
|
96
|
-
|
|
71
|
+
// Use shared utilities for computing factors
|
|
72
|
+
*fac = rabitq_utils::compute_vector_factors(
|
|
73
|
+
x_row, d, centroid_in, metric_type);
|
|
97
74
|
|
|
98
|
-
|
|
75
|
+
// Pack bits into standard RaBitQ format
|
|
76
|
+
for (size_t j = 0; j < d; j++) {
|
|
77
|
+
const float x_val = x_row[j];
|
|
78
|
+
const float centroid_val =
|
|
79
|
+
(centroid_in == nullptr) ? 0.0f : centroid_in[j];
|
|
80
|
+
const float or_minus_c = x_val - centroid_val;
|
|
81
|
+
const bool xb = (or_minus_c > 0.0f);
|
|
99
82
|
|
|
100
83
|
// store the output data
|
|
101
|
-
if (code != nullptr) {
|
|
102
|
-
|
|
103
|
-
// enable a particular bit
|
|
104
|
-
code[j / 8] |= (1 << (j % 8));
|
|
105
|
-
}
|
|
84
|
+
if (code != nullptr && xb) {
|
|
85
|
+
rabitq_utils::set_bit_standard(code, j);
|
|
106
86
|
}
|
|
107
87
|
}
|
|
108
|
-
|
|
109
|
-
// compute factors
|
|
110
|
-
|
|
111
|
-
// compute the inverse norm
|
|
112
|
-
const float inv_norm_L2 =
|
|
113
|
-
(std::abs(norm_L2sqr) < std::numeric_limits<float>::epsilon())
|
|
114
|
-
? 1.0f
|
|
115
|
-
: (1.0f / std::sqrt(norm_L2sqr));
|
|
116
|
-
dp_oO *= inv_norm_L2;
|
|
117
|
-
dp_oO *= inv_d_sqrt;
|
|
118
|
-
|
|
119
|
-
const float inv_dp_oO =
|
|
120
|
-
(std::abs(dp_oO) < std::numeric_limits<float>::epsilon())
|
|
121
|
-
? 1.0f
|
|
122
|
-
: (1.0f / dp_oO);
|
|
123
|
-
|
|
124
|
-
fac->or_minus_c_l2sqr = norm_L2sqr;
|
|
125
|
-
if (metric_type == MetricType::METRIC_INNER_PRODUCT) {
|
|
126
|
-
fac->or_minus_c_l2sqr -= or_L2sqr;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
fac->dp_multiplier = inv_dp_oO * std::sqrt(norm_L2sqr);
|
|
130
88
|
}
|
|
131
89
|
}
|
|
132
90
|
|
|
@@ -310,6 +268,7 @@ struct RaBitDistanceComputerQ : RaBitDistanceComputer {
|
|
|
310
268
|
|
|
311
269
|
// the number of bits for SQ quantization of the query (qb > 0)
|
|
312
270
|
uint8_t qb = 8;
|
|
271
|
+
bool centered = false;
|
|
313
272
|
// the smallest value divisible by 8 that is not smaller than dim
|
|
314
273
|
size_t popcount_aligned_dim = 0;
|
|
315
274
|
|
|
@@ -329,57 +288,35 @@ float RaBitDistanceComputerQ::distance_to_code(const uint8_t* code) {
|
|
|
329
288
|
metric_type == MetricType::METRIC_INNER_PRODUCT));
|
|
330
289
|
|
|
331
290
|
// split the code into parts
|
|
291
|
+
size_t size = (d + 7) / 8;
|
|
332
292
|
const uint8_t* binary_data = code;
|
|
333
|
-
const FactorsData* fac =
|
|
334
|
-
reinterpret_cast<const FactorsData*>(code + (d + 7) / 8);
|
|
293
|
+
const FactorsData* fac = reinterpret_cast<const FactorsData*>(code + size);
|
|
335
294
|
|
|
336
|
-
//
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
// }
|
|
349
|
-
|
|
350
|
-
// this is the scheme for popcount
|
|
351
|
-
const size_t di_8b = (d + 7) / 8;
|
|
352
|
-
const size_t di_64b = (di_8b / 8) * 8;
|
|
353
|
-
|
|
354
|
-
// Use the optimized popcount function from rabitq_simd.h
|
|
355
|
-
float dot_qo =
|
|
356
|
-
rabitq_dp_popcnt(rearranged_rotated_qq.data(), binary_data, d, qb);
|
|
295
|
+
// this is ||or - c||^2 - (IP ? ||or||^2 : 0)
|
|
296
|
+
float final_dot = 0;
|
|
297
|
+
if (centered) {
|
|
298
|
+
int64_t int_dot = ((1 << qb) - 1) * d;
|
|
299
|
+
int_dot -= 2 *
|
|
300
|
+
rabitq::bitwise_xor_dot_product(
|
|
301
|
+
rearranged_rotated_qq.data(), binary_data, size, qb);
|
|
302
|
+
final_dot += int_dot * query_fac.int_dot_scale;
|
|
303
|
+
} else {
|
|
304
|
+
// See RaBitDistanceComputerNotQ::distance_to_code() for baseline code.
|
|
305
|
+
auto dot_qo = rabitq::bitwise_and_dot_product(
|
|
306
|
+
rearranged_rotated_qq.data(), binary_data, size, qb);
|
|
357
307
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
uint64_t sum_q = 0;
|
|
361
|
-
{
|
|
308
|
+
// It was a willful decision (after the discussion) to not to pre-cache
|
|
309
|
+
// the sum of all bits, just in order to reduce the overhead per vector.
|
|
362
310
|
// process 64-bit popcounts
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
//
|
|
369
|
-
|
|
370
|
-
const auto yv = *(binary_data + i);
|
|
371
|
-
sum_q += __builtin_popcount(yv);
|
|
372
|
-
}
|
|
311
|
+
auto sum_q = rabitq::popcount(binary_data, size);
|
|
312
|
+
// dot-product itself
|
|
313
|
+
final_dot += query_fac.c1 * dot_qo;
|
|
314
|
+
// normalizer coefficients
|
|
315
|
+
final_dot += query_fac.c2 * sum_q;
|
|
316
|
+
// normalizer coefficients
|
|
317
|
+
final_dot -= query_fac.c34;
|
|
373
318
|
}
|
|
374
319
|
|
|
375
|
-
float final_dot = 0;
|
|
376
|
-
// dot-product itself
|
|
377
|
-
final_dot += query_fac.c1 * dot_qo;
|
|
378
|
-
// normalizer coefficients
|
|
379
|
-
final_dot += query_fac.c2 * sum_q;
|
|
380
|
-
// normalizer coefficients
|
|
381
|
-
final_dot -= query_fac.c34;
|
|
382
|
-
|
|
383
320
|
// this is ||or - c||^2 - (IP ? ||or||^2 : 0)
|
|
384
321
|
const float or_c_l2sqr = fac->or_minus_c_l2sqr;
|
|
385
322
|
|
|
@@ -402,57 +339,23 @@ float RaBitDistanceComputerQ::distance_to_code(const uint8_t* code) {
|
|
|
402
339
|
}
|
|
403
340
|
}
|
|
404
341
|
|
|
342
|
+
// Use shared constant from RaBitQUtils
|
|
343
|
+
using rabitq_utils::Z_MAX_BY_QB;
|
|
344
|
+
|
|
405
345
|
void RaBitDistanceComputerQ::set_query(const float* x) {
|
|
406
346
|
FAISS_ASSERT(x != nullptr);
|
|
407
347
|
FAISS_ASSERT(
|
|
408
348
|
(metric_type == MetricType::METRIC_L2 ||
|
|
409
349
|
metric_type == MetricType::METRIC_INNER_PRODUCT));
|
|
350
|
+
FAISS_THROW_IF_NOT(qb <= 8);
|
|
351
|
+
FAISS_THROW_IF_NOT(qb > 0);
|
|
410
352
|
|
|
411
|
-
//
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
query_fac.qr_to_c_L2sqr = fvec_norm_L2sqr(x, d);
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
// allocate space
|
|
419
|
-
rotated_qq.resize(d);
|
|
420
|
-
|
|
421
|
-
// rotate the query
|
|
422
|
-
std::vector<float> rotated_q(d);
|
|
423
|
-
for (size_t i = 0; i < d; i++) {
|
|
424
|
-
rotated_q[i] = x[i] - ((centroid == nullptr) ? 0 : centroid[i]);
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
// compute some numbers
|
|
428
|
-
const float inv_d = (d == 0) ? 1.0f : (1.0f / std::sqrt((float)d));
|
|
429
|
-
|
|
430
|
-
// quantize the query. compute min and max
|
|
431
|
-
float v_min = std::numeric_limits<float>::max();
|
|
432
|
-
float v_max = std::numeric_limits<float>::lowest();
|
|
433
|
-
for (size_t i = 0; i < d; i++) {
|
|
434
|
-
const float v_q = rotated_q[i];
|
|
435
|
-
v_min = std::min(v_min, v_q);
|
|
436
|
-
v_max = std::max(v_max, v_q);
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
const float pow_2_qb = 1 << qb;
|
|
440
|
-
|
|
441
|
-
const float delta = (v_max - v_min) / (pow_2_qb - 1);
|
|
442
|
-
const float inv_delta = 1.0f / delta;
|
|
443
|
-
|
|
444
|
-
size_t sum_qq = 0;
|
|
445
|
-
for (int32_t i = 0; i < d; i++) {
|
|
446
|
-
const float v_q = rotated_q[i];
|
|
447
|
-
|
|
448
|
-
// a default non-randomized SQ
|
|
449
|
-
const int v_qq = std::round((v_q - v_min) * inv_delta);
|
|
450
|
-
|
|
451
|
-
rotated_qq[i] = std::min(255, std::max(0, v_qq));
|
|
452
|
-
sum_qq += v_qq;
|
|
453
|
-
}
|
|
353
|
+
// Use shared utilities for core query factor computation
|
|
354
|
+
std::vector<float> rotated_q;
|
|
355
|
+
query_fac = rabitq_utils::compute_query_factors(
|
|
356
|
+
x, d, centroid, qb, centered, metric_type, rotated_q, rotated_qq);
|
|
454
357
|
|
|
455
|
-
//
|
|
358
|
+
// Rearrange the query vector for SIMD operations (RaBitQuantizer-specific)
|
|
456
359
|
popcount_aligned_dim = ((d + 7) / 8) * 8;
|
|
457
360
|
size_t offset = (d + 7) / 8;
|
|
458
361
|
|
|
@@ -466,20 +369,12 @@ void RaBitDistanceComputerQ::set_query(const float* x) {
|
|
|
466
369
|
bit ? (1 << (idim % 8)) : 0;
|
|
467
370
|
}
|
|
468
371
|
}
|
|
469
|
-
|
|
470
|
-
query_fac.c1 = 2 * delta * inv_d;
|
|
471
|
-
query_fac.c2 = 2 * v_min * inv_d;
|
|
472
|
-
query_fac.c34 = inv_d * (delta * sum_qq + d * v_min);
|
|
473
|
-
|
|
474
|
-
if (metric_type == MetricType::METRIC_INNER_PRODUCT) {
|
|
475
|
-
// precompute if needed
|
|
476
|
-
query_fac.qr_norm_L2sqr = fvec_norm_L2sqr(x, d);
|
|
477
|
-
}
|
|
478
372
|
}
|
|
479
373
|
|
|
480
374
|
FlatCodesDistanceComputer* RaBitQuantizer::get_distance_computer(
|
|
481
375
|
uint8_t qb,
|
|
482
|
-
const float* centroid_in
|
|
376
|
+
const float* centroid_in,
|
|
377
|
+
bool centered) const {
|
|
483
378
|
if (qb == 0) {
|
|
484
379
|
auto dc = std::make_unique<RaBitDistanceComputerNotQ>();
|
|
485
380
|
dc->metric_type = metric_type;
|
|
@@ -493,6 +388,7 @@ FlatCodesDistanceComputer* RaBitQuantizer::get_distance_computer(
|
|
|
493
388
|
dc->d = d;
|
|
494
389
|
dc->centroid = centroid_in;
|
|
495
390
|
dc->qb = qb;
|
|
391
|
+
dc->centered = centered;
|
|
496
392
|
|
|
497
393
|
return dc.release();
|
|
498
394
|
}
|
|
@@ -72,7 +72,8 @@ struct RaBitQuantizer : Quantizer {
|
|
|
72
72
|
// specify qb > 0 to have SQ qb-bits query
|
|
73
73
|
FlatCodesDistanceComputer* get_distance_computer(
|
|
74
74
|
uint8_t qb,
|
|
75
|
-
const float* centroid_in = nullptr
|
|
75
|
+
const float* centroid_in = nullptr,
|
|
76
|
+
bool centered = false) const;
|
|
76
77
|
};
|
|
77
78
|
|
|
78
79
|
} // namespace faiss
|
|
@@ -49,7 +49,7 @@ struct ResidualQuantizer : AdditiveQuantizer {
|
|
|
49
49
|
* first element of the beam (faster but less accurate) */
|
|
50
50
|
static const int Train_top_beam = 1024;
|
|
51
51
|
|
|
52
|
-
/** set this bit to *not*
|
|
52
|
+
/** set this bit to *not* automatically compute the codebook tables
|
|
53
53
|
* after training */
|
|
54
54
|
static const int Skip_codebook_tables = 2048;
|
|
55
55
|
|
|
@@ -26,11 +26,11 @@ namespace faiss {
|
|
|
26
26
|
* The classes below are intended to be used as template arguments
|
|
27
27
|
* they handle results for batches of queries (size nq).
|
|
28
28
|
* They can be called in two ways:
|
|
29
|
-
* - by
|
|
29
|
+
* - by instantiating a SingleResultHandler that tracks results for a single
|
|
30
30
|
* query
|
|
31
31
|
* - with begin_multiple/add_results/end_multiple calls where a whole block of
|
|
32
32
|
* results is submitted
|
|
33
|
-
* All classes are templated on C which to define
|
|
33
|
+
* All classes are templated on C which to define whether the min or the max of
|
|
34
34
|
* results is to be kept, and on sel, so that the codepaths for with / without
|
|
35
35
|
* selector can be separated at compile time.
|
|
36
36
|
*****************************************************************/
|
|
@@ -306,7 +306,7 @@ struct HeapBlockResultHandler : TopkBlockResultHandler<C, use_sel> {
|
|
|
306
306
|
*
|
|
307
307
|
* A reservoir is a result array of size capacity > n (number of requested
|
|
308
308
|
* results) all results below a threshold are stored in an arbitrary order.
|
|
309
|
-
*When the capacity is reached, a new threshold is chosen by
|
|
309
|
+
*When the capacity is reached, a new threshold is chosen by partitioning
|
|
310
310
|
*the distance array.
|
|
311
311
|
*****************************************************************/
|
|
312
312
|
|
|
@@ -572,7 +572,7 @@ struct RangeSearchBlockResultHandler : BlockResultHandler<C, use_sel> {
|
|
|
572
572
|
RangeSearchPartialResult* pres;
|
|
573
573
|
// there is one RangeSearchPartialResult structure per j0
|
|
574
574
|
// (= block of columns of the large distance matrix)
|
|
575
|
-
// it is a bit tricky to find the
|
|
575
|
+
// it is a bit tricky to find the proper PartialResult structure
|
|
576
576
|
// because the inner loop is on db not on queries.
|
|
577
577
|
|
|
578
578
|
if (pr < j0s.size() && j0 == j0s[pr]) {
|
|
@@ -321,7 +321,7 @@ struct Codec6bit {
|
|
|
321
321
|
static FAISS_ALWAYS_INLINE __m256
|
|
322
322
|
decode_8_components(const uint8_t* code, int i) {
|
|
323
323
|
// // Faster code for Intel CPUs or AMD Zen3+, just keeping it here
|
|
324
|
-
// // for the reference, maybe, it becomes used
|
|
324
|
+
// // for the reference, maybe, it becomes used one day.
|
|
325
325
|
// const uint16_t* data16 = (const uint16_t*)(code + (i >> 2) * 3);
|
|
326
326
|
// const uint32_t* data32 = (const uint32_t*)data16;
|
|
327
327
|
// const uint64_t val = *data32 + ((uint64_t)data16[2] << 32);
|
|
@@ -40,7 +40,7 @@ struct ScalarQuantizer : Quantizer {
|
|
|
40
40
|
QuantizerType qtype = QT_8bit;
|
|
41
41
|
|
|
42
42
|
/** The uniform encoder can estimate the range of representable
|
|
43
|
-
* values of the
|
|
43
|
+
* values of the uniform encoder using different statistics. Here
|
|
44
44
|
* rs = rangestat_arg */
|
|
45
45
|
|
|
46
46
|
// rangestat_arg.
|
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
* LICENSE file in the root directory of this source tree.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
+
#pragma once
|
|
9
|
+
|
|
8
10
|
#include <faiss/impl/FaissAssert.h>
|
|
9
11
|
#include <exception>
|
|
10
12
|
#include <iostream>
|
|
@@ -75,10 +77,11 @@ void ThreadedIndex<IndexT>::addIndex(IndexT* index) {
|
|
|
75
77
|
}
|
|
76
78
|
}
|
|
77
79
|
|
|
78
|
-
indices_.emplace_back(
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
80
|
+
indices_.emplace_back(
|
|
81
|
+
std::make_pair(
|
|
82
|
+
index,
|
|
83
|
+
std::unique_ptr<WorkerThread>(
|
|
84
|
+
isThreaded_ ? new WorkerThread : nullptr)));
|
|
82
85
|
|
|
83
86
|
onAfterAddIndex(index);
|
|
84
87
|
}
|
|
@@ -29,11 +29,13 @@
|
|
|
29
29
|
#include <faiss/IndexIVFAdditiveQuantizer.h>
|
|
30
30
|
#include <faiss/IndexIVFAdditiveQuantizerFastScan.h>
|
|
31
31
|
#include <faiss/IndexIVFFlat.h>
|
|
32
|
+
#include <faiss/IndexIVFFlatPanorama.h>
|
|
32
33
|
#include <faiss/IndexIVFIndependentQuantizer.h>
|
|
33
34
|
#include <faiss/IndexIVFPQ.h>
|
|
34
35
|
#include <faiss/IndexIVFPQFastScan.h>
|
|
35
36
|
#include <faiss/IndexIVFPQR.h>
|
|
36
37
|
#include <faiss/IndexIVFRaBitQ.h>
|
|
38
|
+
#include <faiss/IndexIVFRaBitQFastScan.h>
|
|
37
39
|
#include <faiss/IndexIVFSpectralHash.h>
|
|
38
40
|
#include <faiss/IndexLSH.h>
|
|
39
41
|
#include <faiss/IndexLattice.h>
|
|
@@ -43,6 +45,7 @@
|
|
|
43
45
|
#include <faiss/IndexPQFastScan.h>
|
|
44
46
|
#include <faiss/IndexPreTransform.h>
|
|
45
47
|
#include <faiss/IndexRaBitQ.h>
|
|
48
|
+
#include <faiss/IndexRaBitQFastScan.h>
|
|
46
49
|
#include <faiss/IndexRefine.h>
|
|
47
50
|
#include <faiss/IndexRowwiseMinMax.h>
|
|
48
51
|
#include <faiss/IndexScalarQuantizer.h>
|
|
@@ -68,9 +71,10 @@ namespace faiss {
|
|
|
68
71
|
**************************************************************/
|
|
69
72
|
|
|
70
73
|
// This is a baseline functionality for reading mmapped and zerocopied vector.
|
|
71
|
-
// * if `beforeknown_size` is defined, then a size of the vector won't be
|
|
74
|
+
// * if `beforeknown_size` is defined, then a size of the vector won't be
|
|
75
|
+
// read.
|
|
72
76
|
// * if `size_multiplier` is defined, then a size will be multiplied by it.
|
|
73
|
-
// * returns true is the case was handled;
|
|
77
|
+
// * returns true is the case was handled; otherwise, false
|
|
74
78
|
template <typename VectorT>
|
|
75
79
|
bool read_vector_base(
|
|
76
80
|
VectorT& target,
|
|
@@ -181,7 +185,7 @@ void read_vector(VectorT& target, IOReader* f) {
|
|
|
181
185
|
// a replacement for READXBVECTOR
|
|
182
186
|
template <typename VectorT>
|
|
183
187
|
void read_xb_vector(VectorT& target, IOReader* f) {
|
|
184
|
-
// size is not known beforehand,
|
|
188
|
+
// size is not known beforehand, multiply the size 4x
|
|
185
189
|
if (read_vector_base<VectorT>(target, f, std::nullopt, 4)) {
|
|
186
190
|
return;
|
|
187
191
|
}
|
|
@@ -325,6 +329,34 @@ InvertedLists* read_InvertedLists(IOReader* f, int io_flags) {
|
|
|
325
329
|
"read_InvertedLists:"
|
|
326
330
|
" WARN! inverted lists not stored with IVF object\n");
|
|
327
331
|
return nullptr;
|
|
332
|
+
} else if (h == fourcc("ilpn") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
|
333
|
+
size_t nlist, code_size, n_levels;
|
|
334
|
+
READ1(nlist);
|
|
335
|
+
READ1(code_size);
|
|
336
|
+
READ1(n_levels);
|
|
337
|
+
auto ailp = new ArrayInvertedListsPanorama(nlist, code_size, n_levels);
|
|
338
|
+
std::vector<size_t> sizes(nlist);
|
|
339
|
+
read_ArrayInvertedLists_sizes(f, sizes);
|
|
340
|
+
for (size_t i = 0; i < nlist; i++) {
|
|
341
|
+
ailp->ids[i].resize(sizes[i]);
|
|
342
|
+
size_t num_elems =
|
|
343
|
+
((sizes[i] + ArrayInvertedListsPanorama::kBatchSize - 1) /
|
|
344
|
+
ArrayInvertedListsPanorama::kBatchSize) *
|
|
345
|
+
ArrayInvertedListsPanorama::kBatchSize;
|
|
346
|
+
ailp->codes[i].resize(num_elems * code_size);
|
|
347
|
+
ailp->cum_sums[i].resize(num_elems * (n_levels + 1));
|
|
348
|
+
}
|
|
349
|
+
for (size_t i = 0; i < nlist; i++) {
|
|
350
|
+
size_t n = sizes[i];
|
|
351
|
+
if (n > 0) {
|
|
352
|
+
read_vector_with_known_size(
|
|
353
|
+
ailp->codes[i], f, ailp->codes[i].size());
|
|
354
|
+
read_vector_with_known_size(ailp->ids[i], f, n);
|
|
355
|
+
read_vector_with_known_size(
|
|
356
|
+
ailp->cum_sums[i], f, ailp->cum_sums[i].size());
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
return ailp;
|
|
328
360
|
} else if (h == fourcc("ilar") && !(io_flags & IO_FLAG_SKIP_IVF_DATA)) {
|
|
329
361
|
auto ails = new ArrayInvertedLists(0, 0);
|
|
330
362
|
READ1(ails->nlist);
|
|
@@ -927,6 +959,13 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
|
927
959
|
}
|
|
928
960
|
read_InvertedLists(ivfl, f, io_flags);
|
|
929
961
|
idx = ivfl;
|
|
962
|
+
} else if (h == fourcc("IwPn")) {
|
|
963
|
+
IndexIVFFlatPanorama* ivfp = new IndexIVFFlatPanorama();
|
|
964
|
+
read_ivf_header(ivfp, f);
|
|
965
|
+
ivfp->code_size = ivfp->d * sizeof(float);
|
|
966
|
+
READ1(ivfp->n_levels);
|
|
967
|
+
read_InvertedLists(ivfp, f, io_flags);
|
|
968
|
+
idx = ivfp;
|
|
930
969
|
} else if (h == fourcc("IwFl")) {
|
|
931
970
|
IndexIVFFlat* ivfl = new IndexIVFFlat();
|
|
932
971
|
read_ivf_header(ivfl, f);
|
|
@@ -1224,6 +1263,27 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
|
1224
1263
|
imm->own_fields = true;
|
|
1225
1264
|
|
|
1226
1265
|
idx = imm;
|
|
1266
|
+
} else if (h == fourcc("Irfs")) {
|
|
1267
|
+
IndexRaBitQFastScan* idxqfs = new IndexRaBitQFastScan();
|
|
1268
|
+
read_index_header(idxqfs, f);
|
|
1269
|
+
read_RaBitQuantizer(&idxqfs->rabitq, f);
|
|
1270
|
+
READVECTOR(idxqfs->center);
|
|
1271
|
+
READ1(idxqfs->qb);
|
|
1272
|
+
READVECTOR(idxqfs->factors_storage);
|
|
1273
|
+
READ1(idxqfs->bbs);
|
|
1274
|
+
READ1(idxqfs->ntotal2);
|
|
1275
|
+
READ1(idxqfs->M2);
|
|
1276
|
+
READ1(idxqfs->code_size);
|
|
1277
|
+
|
|
1278
|
+
// Need to initialize the FastScan base class fields
|
|
1279
|
+
const size_t M_fastscan = (idxqfs->d + 3) / 4;
|
|
1280
|
+
constexpr size_t nbits_fastscan = 4;
|
|
1281
|
+
idxqfs->M = M_fastscan;
|
|
1282
|
+
idxqfs->nbits = nbits_fastscan;
|
|
1283
|
+
idxqfs->ksub = (1 << nbits_fastscan);
|
|
1284
|
+
|
|
1285
|
+
READVECTOR(idxqfs->codes);
|
|
1286
|
+
idx = idxqfs;
|
|
1227
1287
|
} else if (h == fourcc("Ixrq")) {
|
|
1228
1288
|
IndexRaBitQ* idxq = new IndexRaBitQ();
|
|
1229
1289
|
read_index_header(idxq, f);
|
|
@@ -1242,6 +1302,30 @@ Index* read_index(IOReader* f, int io_flags) {
|
|
|
1242
1302
|
READ1(ivrq->qb);
|
|
1243
1303
|
read_InvertedLists(ivrq, f, io_flags);
|
|
1244
1304
|
idx = ivrq;
|
|
1305
|
+
} else if (h == fourcc("Iwrf")) {
|
|
1306
|
+
IndexIVFRaBitQFastScan* ivrqfs = new IndexIVFRaBitQFastScan();
|
|
1307
|
+
read_ivf_header(ivrqfs, f);
|
|
1308
|
+
read_RaBitQuantizer(&ivrqfs->rabitq, f);
|
|
1309
|
+
READ1(ivrqfs->by_residual);
|
|
1310
|
+
READ1(ivrqfs->code_size);
|
|
1311
|
+
READ1(ivrqfs->bbs);
|
|
1312
|
+
READ1(ivrqfs->qbs2);
|
|
1313
|
+
READ1(ivrqfs->M2);
|
|
1314
|
+
READ1(ivrqfs->implem);
|
|
1315
|
+
READ1(ivrqfs->qb);
|
|
1316
|
+
READ1(ivrqfs->centered);
|
|
1317
|
+
READVECTOR(ivrqfs->factors_storage);
|
|
1318
|
+
|
|
1319
|
+
// Initialize FastScan base class fields
|
|
1320
|
+
const size_t M_fastscan = (ivrqfs->d + 3) / 4;
|
|
1321
|
+
constexpr size_t nbits_fastscan = 4;
|
|
1322
|
+
ivrqfs->M = M_fastscan;
|
|
1323
|
+
ivrqfs->nbits = nbits_fastscan;
|
|
1324
|
+
ivrqfs->ksub = (1 << nbits_fastscan);
|
|
1325
|
+
|
|
1326
|
+
read_InvertedLists(ivrqfs, f, io_flags);
|
|
1327
|
+
ivrqfs->init_code_packer();
|
|
1328
|
+
idx = ivrqfs;
|
|
1245
1329
|
} else {
|
|
1246
1330
|
FAISS_THROW_FMT(
|
|
1247
1331
|
"Index type 0x%08x (\"%s\") not recognized",
|