faiss 0.2.6 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/faiss/extconf.rb +1 -1
- data/lib/faiss/version.rb +1 -1
- data/lib/faiss.rb +2 -2
- data/vendor/faiss/faiss/AutoTune.cpp +15 -4
- data/vendor/faiss/faiss/AutoTune.h +0 -1
- data/vendor/faiss/faiss/Clustering.cpp +1 -5
- data/vendor/faiss/faiss/Clustering.h +0 -2
- data/vendor/faiss/faiss/IVFlib.h +0 -2
- data/vendor/faiss/faiss/Index.h +1 -2
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.cpp +17 -3
- data/vendor/faiss/faiss/IndexAdditiveQuantizer.h +10 -1
- data/vendor/faiss/faiss/IndexBinary.h +0 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.cpp +2 -1
- data/vendor/faiss/faiss/IndexBinaryFlat.h +4 -0
- data/vendor/faiss/faiss/IndexBinaryHash.cpp +1 -3
- data/vendor/faiss/faiss/IndexBinaryIVF.cpp +273 -48
- data/vendor/faiss/faiss/IndexBinaryIVF.h +18 -11
- data/vendor/faiss/faiss/IndexFastScan.cpp +13 -10
- data/vendor/faiss/faiss/IndexFastScan.h +5 -1
- data/vendor/faiss/faiss/IndexFlat.cpp +16 -3
- data/vendor/faiss/faiss/IndexFlat.h +1 -1
- data/vendor/faiss/faiss/IndexFlatCodes.cpp +5 -0
- data/vendor/faiss/faiss/IndexFlatCodes.h +7 -2
- data/vendor/faiss/faiss/IndexHNSW.cpp +3 -6
- data/vendor/faiss/faiss/IndexHNSW.h +0 -1
- data/vendor/faiss/faiss/IndexIDMap.cpp +4 -4
- data/vendor/faiss/faiss/IndexIDMap.h +0 -2
- data/vendor/faiss/faiss/IndexIVF.cpp +155 -129
- data/vendor/faiss/faiss/IndexIVF.h +121 -61
- data/vendor/faiss/faiss/IndexIVFAdditiveQuantizer.cpp +2 -2
- data/vendor/faiss/faiss/IndexIVFFastScan.cpp +12 -11
- data/vendor/faiss/faiss/IndexIVFFastScan.h +6 -1
- data/vendor/faiss/faiss/IndexIVFPQ.cpp +221 -165
- data/vendor/faiss/faiss/IndexIVFPQ.h +1 -0
- data/vendor/faiss/faiss/IndexIVFPQFastScan.cpp +6 -1
- data/vendor/faiss/faiss/IndexIVFSpectralHash.cpp +0 -2
- data/vendor/faiss/faiss/IndexNNDescent.cpp +1 -2
- data/vendor/faiss/faiss/IndexNNDescent.h +0 -1
- data/vendor/faiss/faiss/IndexNSG.cpp +1 -2
- data/vendor/faiss/faiss/IndexPQ.cpp +7 -9
- data/vendor/faiss/faiss/IndexRefine.cpp +1 -1
- data/vendor/faiss/faiss/IndexReplicas.cpp +3 -4
- data/vendor/faiss/faiss/IndexReplicas.h +0 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.cpp +8 -1
- data/vendor/faiss/faiss/IndexRowwiseMinMax.h +7 -0
- data/vendor/faiss/faiss/IndexShards.cpp +26 -109
- data/vendor/faiss/faiss/IndexShards.h +2 -3
- data/vendor/faiss/faiss/IndexShardsIVF.cpp +246 -0
- data/vendor/faiss/faiss/IndexShardsIVF.h +42 -0
- data/vendor/faiss/faiss/MetaIndexes.cpp +86 -0
- data/vendor/faiss/faiss/MetaIndexes.h +29 -0
- data/vendor/faiss/faiss/MetricType.h +14 -0
- data/vendor/faiss/faiss/VectorTransform.cpp +8 -10
- data/vendor/faiss/faiss/VectorTransform.h +1 -3
- data/vendor/faiss/faiss/clone_index.cpp +232 -18
- data/vendor/faiss/faiss/cppcontrib/SaDecodeKernels.h +25 -3
- data/vendor/faiss/faiss/cppcontrib/detail/CoarseBitType.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/detail/UintReader.h +78 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-avx2-inl.h +20 -6
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/Level2-neon-inl.h +21 -7
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMax-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/MinMaxFP16-inl.h +7 -0
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-avx2-inl.h +10 -3
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-inl.h +7 -1
- data/vendor/faiss/faiss/cppcontrib/sa_decode/PQ-neon-inl.h +11 -3
- data/vendor/faiss/faiss/gpu/GpuAutoTune.cpp +25 -2
- data/vendor/faiss/faiss/gpu/GpuCloner.cpp +76 -29
- data/vendor/faiss/faiss/gpu/GpuCloner.h +2 -2
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.h +14 -13
- data/vendor/faiss/faiss/gpu/GpuDistance.h +18 -6
- data/vendor/faiss/faiss/gpu/GpuIndex.h +23 -21
- data/vendor/faiss/faiss/gpu/GpuIndexBinaryFlat.h +10 -10
- data/vendor/faiss/faiss/gpu/GpuIndexFlat.h +11 -12
- data/vendor/faiss/faiss/gpu/GpuIndexIVF.h +29 -50
- data/vendor/faiss/faiss/gpu/GpuIndexIVFFlat.h +3 -3
- data/vendor/faiss/faiss/gpu/GpuIndexIVFPQ.h +8 -8
- data/vendor/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/gpu/impl/IndexUtils.h +2 -5
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.cpp +9 -7
- data/vendor/faiss/faiss/gpu/impl/RemapIndices.h +4 -4
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper-inl.h +2 -2
- data/vendor/faiss/faiss/gpu/perf/IndexWrapper.h +1 -1
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexBinaryFlat.cpp +55 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexFlat.cpp +20 -6
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +95 -25
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +67 -16
- data/vendor/faiss/faiss/gpu/test/TestGpuIndexIVFScalarQuantizer.cpp +4 -4
- data/vendor/faiss/faiss/gpu/test/TestUtils.cpp +7 -7
- data/vendor/faiss/faiss/gpu/test/TestUtils.h +4 -4
- data/vendor/faiss/faiss/gpu/test/demo_ivfpq_indexing_gpu.cpp +1 -1
- data/vendor/faiss/faiss/gpu/utils/DeviceUtils.h +6 -0
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/AdditiveQuantizer.h +9 -9
- data/vendor/faiss/faiss/impl/AuxIndexStructures.cpp +1 -1
- data/vendor/faiss/faiss/impl/AuxIndexStructures.h +2 -7
- data/vendor/faiss/faiss/impl/CodePacker.cpp +67 -0
- data/vendor/faiss/faiss/impl/CodePacker.h +71 -0
- data/vendor/faiss/faiss/impl/DistanceComputer.h +0 -2
- data/vendor/faiss/faiss/impl/HNSW.cpp +3 -7
- data/vendor/faiss/faiss/impl/HNSW.h +6 -9
- data/vendor/faiss/faiss/impl/IDSelector.cpp +1 -1
- data/vendor/faiss/faiss/impl/IDSelector.h +39 -1
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.cpp +62 -51
- data/vendor/faiss/faiss/impl/LocalSearchQuantizer.h +11 -12
- data/vendor/faiss/faiss/impl/NNDescent.cpp +3 -9
- data/vendor/faiss/faiss/impl/NNDescent.h +10 -10
- data/vendor/faiss/faiss/impl/NSG.cpp +1 -6
- data/vendor/faiss/faiss/impl/NSG.h +4 -7
- data/vendor/faiss/faiss/impl/PolysemousTraining.cpp +1 -15
- data/vendor/faiss/faiss/impl/PolysemousTraining.h +11 -10
- data/vendor/faiss/faiss/impl/ProductAdditiveQuantizer.cpp +0 -7
- data/vendor/faiss/faiss/impl/ProductQuantizer.cpp +25 -12
- data/vendor/faiss/faiss/impl/ProductQuantizer.h +2 -4
- data/vendor/faiss/faiss/impl/Quantizer.h +6 -3
- data/vendor/faiss/faiss/impl/ResidualQuantizer.cpp +796 -174
- data/vendor/faiss/faiss/impl/ResidualQuantizer.h +16 -8
- data/vendor/faiss/faiss/impl/ScalarQuantizer.cpp +3 -5
- data/vendor/faiss/faiss/impl/ScalarQuantizer.h +4 -4
- data/vendor/faiss/faiss/impl/ThreadedIndex-inl.h +3 -3
- data/vendor/faiss/faiss/impl/ThreadedIndex.h +4 -4
- data/vendor/faiss/faiss/impl/code_distance/code_distance-avx2.h +291 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance-generic.h +74 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance.h +123 -0
- data/vendor/faiss/faiss/impl/code_distance/code_distance_avx512.h +102 -0
- data/vendor/faiss/faiss/impl/index_read.cpp +13 -10
- data/vendor/faiss/faiss/impl/index_write.cpp +3 -4
- data/vendor/faiss/faiss/impl/kmeans1d.cpp +0 -1
- data/vendor/faiss/faiss/impl/kmeans1d.h +3 -3
- data/vendor/faiss/faiss/impl/lattice_Zn.cpp +1 -1
- data/vendor/faiss/faiss/impl/platform_macros.h +61 -0
- data/vendor/faiss/faiss/impl/pq4_fast_scan.cpp +48 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan.h +18 -4
- data/vendor/faiss/faiss/impl/pq4_fast_scan_search_qbs.cpp +2 -2
- data/vendor/faiss/faiss/index_factory.cpp +8 -10
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.cpp +29 -12
- data/vendor/faiss/faiss/invlists/BlockInvertedLists.h +8 -2
- data/vendor/faiss/faiss/invlists/DirectMap.cpp +1 -1
- data/vendor/faiss/faiss/invlists/DirectMap.h +2 -4
- data/vendor/faiss/faiss/invlists/InvertedLists.cpp +118 -18
- data/vendor/faiss/faiss/invlists/InvertedLists.h +44 -4
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.cpp +3 -3
- data/vendor/faiss/faiss/invlists/OnDiskInvertedLists.h +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.cpp +1 -1
- data/vendor/faiss/faiss/python/python_callbacks.h +1 -1
- data/vendor/faiss/faiss/utils/AlignedTable.h +3 -1
- data/vendor/faiss/faiss/utils/Heap.cpp +139 -3
- data/vendor/faiss/faiss/utils/Heap.h +35 -1
- data/vendor/faiss/faiss/utils/approx_topk/approx_topk.h +84 -0
- data/vendor/faiss/faiss/utils/approx_topk/avx2-inl.h +196 -0
- data/vendor/faiss/faiss/utils/approx_topk/generic.h +138 -0
- data/vendor/faiss/faiss/utils/approx_topk/mode.h +34 -0
- data/vendor/faiss/faiss/utils/approx_topk_hamming/approx_topk_hamming.h +367 -0
- data/vendor/faiss/faiss/utils/distances.cpp +61 -7
- data/vendor/faiss/faiss/utils/distances.h +11 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.cpp +346 -0
- data/vendor/faiss/faiss/utils/distances_fused/avx512.h +36 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.cpp +42 -0
- data/vendor/faiss/faiss/utils/distances_fused/distances_fused.h +40 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.cpp +352 -0
- data/vendor/faiss/faiss/utils/distances_fused/simdlib_based.h +32 -0
- data/vendor/faiss/faiss/utils/distances_simd.cpp +515 -327
- data/vendor/faiss/faiss/utils/extra_distances-inl.h +17 -1
- data/vendor/faiss/faiss/utils/extra_distances.cpp +37 -8
- data/vendor/faiss/faiss/utils/extra_distances.h +2 -1
- data/vendor/faiss/faiss/utils/fp16-fp16c.h +7 -0
- data/vendor/faiss/faiss/utils/fp16-inl.h +7 -0
- data/vendor/faiss/faiss/utils/fp16.h +7 -0
- data/vendor/faiss/faiss/utils/hamming-inl.h +0 -456
- data/vendor/faiss/faiss/utils/hamming.cpp +104 -120
- data/vendor/faiss/faiss/utils/hamming.h +21 -10
- data/vendor/faiss/faiss/utils/hamming_distance/avx2-inl.h +535 -0
- data/vendor/faiss/faiss/utils/hamming_distance/common.h +48 -0
- data/vendor/faiss/faiss/utils/hamming_distance/generic-inl.h +519 -0
- data/vendor/faiss/faiss/utils/hamming_distance/hamdis-inl.h +26 -0
- data/vendor/faiss/faiss/utils/hamming_distance/neon-inl.h +614 -0
- data/vendor/faiss/faiss/utils/partitioning.cpp +21 -25
- data/vendor/faiss/faiss/utils/simdlib_avx2.h +344 -3
- data/vendor/faiss/faiss/utils/simdlib_emulated.h +390 -0
- data/vendor/faiss/faiss/utils/simdlib_neon.h +655 -130
- data/vendor/faiss/faiss/utils/sorting.cpp +692 -0
- data/vendor/faiss/faiss/utils/sorting.h +71 -0
- data/vendor/faiss/faiss/utils/transpose/transpose-avx2-inl.h +165 -0
- data/vendor/faiss/faiss/utils/utils.cpp +4 -176
- data/vendor/faiss/faiss/utils/utils.h +2 -9
- metadata +29 -3
- data/vendor/faiss/faiss/gpu/GpuClonerOptions.cpp +0 -26
|
@@ -33,9 +33,7 @@
|
|
|
33
33
|
|
|
34
34
|
#include <faiss/impl/ProductQuantizer.h>
|
|
35
35
|
|
|
36
|
-
#
|
|
37
|
-
#include <immintrin.h>
|
|
38
|
-
#endif
|
|
36
|
+
#include <faiss/impl/code_distance/code_distance.h>
|
|
39
37
|
|
|
40
38
|
namespace faiss {
|
|
41
39
|
|
|
@@ -51,7 +49,6 @@ IndexIVFPQ::IndexIVFPQ(
|
|
|
51
49
|
size_t nbits_per_idx,
|
|
52
50
|
MetricType metric)
|
|
53
51
|
: IndexIVF(quantizer, d, nlist, 0, metric), pq(d, M, nbits_per_idx) {
|
|
54
|
-
FAISS_THROW_IF_NOT(nbits_per_idx <= 8);
|
|
55
52
|
code_size = pq.code_size;
|
|
56
53
|
invlists->code_size = code_size;
|
|
57
54
|
is_trained = false;
|
|
@@ -198,9 +195,9 @@ void IndexIVFPQ::add_core(
|
|
|
198
195
|
|
|
199
196
|
static float* compute_residuals(
|
|
200
197
|
const Index* quantizer,
|
|
201
|
-
|
|
198
|
+
idx_t n,
|
|
202
199
|
const float* x,
|
|
203
|
-
const
|
|
200
|
+
const idx_t* list_nos) {
|
|
204
201
|
size_t d = quantizer->d;
|
|
205
202
|
float* residuals = new float[n * d];
|
|
206
203
|
// TODO: parallelize?
|
|
@@ -423,6 +420,7 @@ void initialize_IVFPQ_precomputed_table(
|
|
|
423
420
|
const Index* quantizer,
|
|
424
421
|
const ProductQuantizer& pq,
|
|
425
422
|
AlignedTable<float>& precomputed_table,
|
|
423
|
+
bool by_residual,
|
|
426
424
|
bool verbose) {
|
|
427
425
|
size_t nlist = quantizer->ntotal;
|
|
428
426
|
size_t d = quantizer->d;
|
|
@@ -434,10 +432,10 @@ void initialize_IVFPQ_precomputed_table(
|
|
|
434
432
|
}
|
|
435
433
|
|
|
436
434
|
if (use_precomputed_table == 0) { // then choose the type of table
|
|
437
|
-
if (quantizer->metric_type ==
|
|
435
|
+
if (!(quantizer->metric_type == METRIC_L2 && by_residual)) {
|
|
438
436
|
if (verbose) {
|
|
439
437
|
printf("IndexIVFPQ::precompute_table: precomputed "
|
|
440
|
-
"tables
|
|
438
|
+
"tables needed only for L2 metric and by_residual is enabled\n");
|
|
441
439
|
}
|
|
442
440
|
precomputed_table.resize(0);
|
|
443
441
|
return;
|
|
@@ -516,13 +514,16 @@ void initialize_IVFPQ_precomputed_table(
|
|
|
516
514
|
|
|
517
515
|
void IndexIVFPQ::precompute_table() {
|
|
518
516
|
initialize_IVFPQ_precomputed_table(
|
|
519
|
-
use_precomputed_table,
|
|
517
|
+
use_precomputed_table,
|
|
518
|
+
quantizer,
|
|
519
|
+
pq,
|
|
520
|
+
precomputed_table,
|
|
521
|
+
by_residual,
|
|
522
|
+
verbose);
|
|
520
523
|
}
|
|
521
524
|
|
|
522
525
|
namespace {
|
|
523
526
|
|
|
524
|
-
using idx_t = Index::idx_t;
|
|
525
|
-
|
|
526
527
|
#define TIC t0 = get_cycles()
|
|
527
528
|
#define TOC get_cycles() - t0
|
|
528
529
|
|
|
@@ -623,7 +624,7 @@ struct QueryTables {
|
|
|
623
624
|
*****************************************************/
|
|
624
625
|
|
|
625
626
|
// fields specific to list
|
|
626
|
-
|
|
627
|
+
idx_t key;
|
|
627
628
|
float coarse_dis;
|
|
628
629
|
std::vector<uint8_t> q_code;
|
|
629
630
|
|
|
@@ -886,140 +887,29 @@ struct IVFPQScannerT : QueryTables {
|
|
|
886
887
|
* Scaning the codes: simple PQ scan.
|
|
887
888
|
*****************************************************/
|
|
888
889
|
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
}
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
type inline distance_single_code(const uint8_t* code) const {
|
|
913
|
-
float result = 0;
|
|
914
|
-
|
|
915
|
-
size_t m = 0;
|
|
916
|
-
const size_t pqM16 = pq.M / 16;
|
|
917
|
-
|
|
918
|
-
const float* tab = sim_table;
|
|
919
|
-
|
|
920
|
-
if (pqM16 > 0) {
|
|
921
|
-
// process 16 values per loop
|
|
922
|
-
|
|
923
|
-
const __m256i ksub = _mm256_set1_epi32(pq.ksub);
|
|
924
|
-
__m256i offsets_0 = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
|
|
925
|
-
offsets_0 = _mm256_mullo_epi32(offsets_0, ksub);
|
|
926
|
-
|
|
927
|
-
// accumulators of partial sums
|
|
928
|
-
__m256 partialSum = _mm256_setzero_ps();
|
|
929
|
-
|
|
930
|
-
// loop
|
|
931
|
-
for (m = 0; m < pqM16 * 16; m += 16) {
|
|
932
|
-
// load 16 uint8 values
|
|
933
|
-
const __m128i mm1 =
|
|
934
|
-
_mm_loadu_si128((const __m128i_u*)(code + m));
|
|
935
|
-
{
|
|
936
|
-
// convert uint8 values (low part of __m128i) to int32
|
|
937
|
-
// values
|
|
938
|
-
const __m256i idx1 = _mm256_cvtepu8_epi32(mm1);
|
|
939
|
-
|
|
940
|
-
// add offsets
|
|
941
|
-
const __m256i indices_to_read_from =
|
|
942
|
-
_mm256_add_epi32(idx1, offsets_0);
|
|
943
|
-
|
|
944
|
-
// gather 8 values, similar to 8 operations of tab[idx]
|
|
945
|
-
__m256 collected = _mm256_i32gather_ps(
|
|
946
|
-
tab, indices_to_read_from, sizeof(float));
|
|
947
|
-
tab += pq.ksub * 8;
|
|
948
|
-
|
|
949
|
-
// collect partial sums
|
|
950
|
-
partialSum = _mm256_add_ps(partialSum, collected);
|
|
951
|
-
}
|
|
952
|
-
|
|
953
|
-
// move high 8 uint8 to low ones
|
|
954
|
-
const __m128i mm2 =
|
|
955
|
-
_mm_unpackhi_epi64(mm1, _mm_setzero_si128());
|
|
956
|
-
{
|
|
957
|
-
// convert uint8 values (low part of __m128i) to int32
|
|
958
|
-
// values
|
|
959
|
-
const __m256i idx1 = _mm256_cvtepu8_epi32(mm2);
|
|
960
|
-
|
|
961
|
-
// add offsets
|
|
962
|
-
const __m256i indices_to_read_from =
|
|
963
|
-
_mm256_add_epi32(idx1, offsets_0);
|
|
964
|
-
|
|
965
|
-
// gather 8 values, similar to 8 operations of tab[idx]
|
|
966
|
-
__m256 collected = _mm256_i32gather_ps(
|
|
967
|
-
tab, indices_to_read_from, sizeof(float));
|
|
968
|
-
tab += pq.ksub * 8;
|
|
969
|
-
|
|
970
|
-
// collect partial sums
|
|
971
|
-
partialSum = _mm256_add_ps(partialSum, collected);
|
|
972
|
-
}
|
|
973
|
-
}
|
|
974
|
-
|
|
975
|
-
// horizontal sum for partialSum
|
|
976
|
-
const __m256 h0 = _mm256_hadd_ps(partialSum, partialSum);
|
|
977
|
-
const __m256 h1 = _mm256_hadd_ps(h0, h0);
|
|
978
|
-
|
|
979
|
-
// extract high and low __m128 regs from __m256
|
|
980
|
-
const __m128 h2 = _mm256_extractf128_ps(h1, 1);
|
|
981
|
-
const __m128 h3 = _mm256_castps256_ps128(h1);
|
|
982
|
-
|
|
983
|
-
// get a final hsum into all 4 regs
|
|
984
|
-
const __m128 h4 = _mm_add_ss(h2, h3);
|
|
985
|
-
|
|
986
|
-
// extract f[0] from __m128
|
|
987
|
-
const float hsum = _mm_cvtss_f32(h4);
|
|
988
|
-
result += hsum;
|
|
989
|
-
}
|
|
990
|
-
|
|
991
|
-
//
|
|
992
|
-
if (m < pq.M) {
|
|
993
|
-
// process leftovers
|
|
994
|
-
PQDecoder decoder(code + m, pq.nbits);
|
|
995
|
-
|
|
996
|
-
for (; m < pq.M; m++) {
|
|
997
|
-
result += tab[decoder.decode()];
|
|
998
|
-
tab += pq.ksub;
|
|
999
|
-
}
|
|
1000
|
-
}
|
|
1001
|
-
|
|
1002
|
-
return result;
|
|
1003
|
-
}
|
|
1004
|
-
|
|
1005
|
-
#else
|
|
1006
|
-
/// Returns the distance to a single code.
|
|
1007
|
-
/// General-purpose version.
|
|
1008
|
-
template <class SearchResultType>
|
|
1009
|
-
inline float distance_single_code(const uint8_t* code) const {
|
|
1010
|
-
PQDecoder decoder(code, pq.nbits);
|
|
1011
|
-
|
|
1012
|
-
const float* tab = sim_table;
|
|
1013
|
-
float result = 0;
|
|
1014
|
-
|
|
1015
|
-
for (size_t m = 0; m < pq.M; m++) {
|
|
1016
|
-
result += tab[decoder.decode()];
|
|
1017
|
-
tab += pq.ksub;
|
|
1018
|
-
}
|
|
1019
|
-
|
|
1020
|
-
return result;
|
|
1021
|
-
}
|
|
1022
|
-
#endif
|
|
890
|
+
// This is the baseline version of scan_list_with_tables().
|
|
891
|
+
// It demonstrates what this function actually does.
|
|
892
|
+
//
|
|
893
|
+
// /// version of the scan where we use precomputed tables.
|
|
894
|
+
// template <class SearchResultType>
|
|
895
|
+
// void scan_list_with_table(
|
|
896
|
+
// size_t ncode,
|
|
897
|
+
// const uint8_t* codes,
|
|
898
|
+
// SearchResultType& res) const {
|
|
899
|
+
//
|
|
900
|
+
// for (size_t j = 0; j < ncode; j++, codes += pq.code_size) {
|
|
901
|
+
// if (res.skip_entry(j)) {
|
|
902
|
+
// continue;
|
|
903
|
+
// }
|
|
904
|
+
// float dis = dis0 + distance_single_code<PQDecoder>(
|
|
905
|
+
// pq, sim_table, codes);
|
|
906
|
+
// res.add(j, dis);
|
|
907
|
+
// }
|
|
908
|
+
// }
|
|
909
|
+
|
|
910
|
+
// This is the modified version of scan_list_with_tables().
|
|
911
|
+
// It was observed that doing manual unrolling of the loop that
|
|
912
|
+
// utilizes distance_single_code() speeds up the computations.
|
|
1023
913
|
|
|
1024
914
|
/// version of the scan where we use precomputed tables.
|
|
1025
915
|
template <class SearchResultType>
|
|
@@ -1027,12 +917,65 @@ struct IVFPQScannerT : QueryTables {
|
|
|
1027
917
|
size_t ncode,
|
|
1028
918
|
const uint8_t* codes,
|
|
1029
919
|
SearchResultType& res) const {
|
|
1030
|
-
|
|
920
|
+
int counter = 0;
|
|
921
|
+
|
|
922
|
+
size_t saved_j[4] = {0, 0, 0, 0};
|
|
923
|
+
for (size_t j = 0; j < ncode; j++) {
|
|
1031
924
|
if (res.skip_entry(j)) {
|
|
1032
925
|
continue;
|
|
1033
926
|
}
|
|
1034
|
-
|
|
1035
|
-
|
|
927
|
+
|
|
928
|
+
saved_j[0] = (counter == 0) ? j : saved_j[0];
|
|
929
|
+
saved_j[1] = (counter == 1) ? j : saved_j[1];
|
|
930
|
+
saved_j[2] = (counter == 2) ? j : saved_j[2];
|
|
931
|
+
saved_j[3] = (counter == 3) ? j : saved_j[3];
|
|
932
|
+
|
|
933
|
+
counter += 1;
|
|
934
|
+
if (counter == 4) {
|
|
935
|
+
float distance_0 = 0;
|
|
936
|
+
float distance_1 = 0;
|
|
937
|
+
float distance_2 = 0;
|
|
938
|
+
float distance_3 = 0;
|
|
939
|
+
distance_four_codes<PQDecoder>(
|
|
940
|
+
pq,
|
|
941
|
+
sim_table,
|
|
942
|
+
codes + saved_j[0] * pq.code_size,
|
|
943
|
+
codes + saved_j[1] * pq.code_size,
|
|
944
|
+
codes + saved_j[2] * pq.code_size,
|
|
945
|
+
codes + saved_j[3] * pq.code_size,
|
|
946
|
+
distance_0,
|
|
947
|
+
distance_1,
|
|
948
|
+
distance_2,
|
|
949
|
+
distance_3);
|
|
950
|
+
|
|
951
|
+
res.add(saved_j[0], dis0 + distance_0);
|
|
952
|
+
res.add(saved_j[1], dis0 + distance_1);
|
|
953
|
+
res.add(saved_j[2], dis0 + distance_2);
|
|
954
|
+
res.add(saved_j[3], dis0 + distance_3);
|
|
955
|
+
counter = 0;
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
if (counter >= 1) {
|
|
960
|
+
float dis =
|
|
961
|
+
dis0 +
|
|
962
|
+
distance_single_code<PQDecoder>(
|
|
963
|
+
pq, sim_table, codes + saved_j[0] * pq.code_size);
|
|
964
|
+
res.add(saved_j[0], dis);
|
|
965
|
+
}
|
|
966
|
+
if (counter >= 2) {
|
|
967
|
+
float dis =
|
|
968
|
+
dis0 +
|
|
969
|
+
distance_single_code<PQDecoder>(
|
|
970
|
+
pq, sim_table, codes + saved_j[1] * pq.code_size);
|
|
971
|
+
res.add(saved_j[1], dis);
|
|
972
|
+
}
|
|
973
|
+
if (counter >= 3) {
|
|
974
|
+
float dis =
|
|
975
|
+
dis0 +
|
|
976
|
+
distance_single_code<PQDecoder>(
|
|
977
|
+
pq, sim_table, codes + saved_j[2] * pq.code_size);
|
|
978
|
+
res.add(saved_j[2], dis);
|
|
1036
979
|
}
|
|
1037
980
|
}
|
|
1038
981
|
|
|
@@ -1101,6 +1044,46 @@ struct IVFPQScannerT : QueryTables {
|
|
|
1101
1044
|
* Scanning codes with polysemous filtering
|
|
1102
1045
|
*****************************************************/
|
|
1103
1046
|
|
|
1047
|
+
// This is the baseline version of scan_list_polysemous_hc().
|
|
1048
|
+
// It demonstrates what this function actually does.
|
|
1049
|
+
|
|
1050
|
+
// template <class HammingComputer, class SearchResultType>
|
|
1051
|
+
// void scan_list_polysemous_hc(
|
|
1052
|
+
// size_t ncode,
|
|
1053
|
+
// const uint8_t* codes,
|
|
1054
|
+
// SearchResultType& res) const {
|
|
1055
|
+
// int ht = ivfpq.polysemous_ht;
|
|
1056
|
+
// size_t n_hamming_pass = 0, nup = 0;
|
|
1057
|
+
//
|
|
1058
|
+
// int code_size = pq.code_size;
|
|
1059
|
+
//
|
|
1060
|
+
// HammingComputer hc(q_code.data(), code_size);
|
|
1061
|
+
//
|
|
1062
|
+
// for (size_t j = 0; j < ncode; j++, codes += code_size) {
|
|
1063
|
+
// if (res.skip_entry(j)) {
|
|
1064
|
+
// continue;
|
|
1065
|
+
// }
|
|
1066
|
+
// const uint8_t* b_code = codes;
|
|
1067
|
+
// int hd = hc.hamming(b_code);
|
|
1068
|
+
// if (hd < ht) {
|
|
1069
|
+
// n_hamming_pass++;
|
|
1070
|
+
//
|
|
1071
|
+
// float dis =
|
|
1072
|
+
// dis0 +
|
|
1073
|
+
// distance_single_code<PQDecoder>(
|
|
1074
|
+
// pq, sim_table, codes);
|
|
1075
|
+
//
|
|
1076
|
+
// res.add(j, dis);
|
|
1077
|
+
// }
|
|
1078
|
+
// }
|
|
1079
|
+
// #pragma omp critical
|
|
1080
|
+
// { indexIVFPQ_stats.n_hamming_pass += n_hamming_pass; }
|
|
1081
|
+
// }
|
|
1082
|
+
|
|
1083
|
+
// This is the modified version of scan_list_with_tables().
|
|
1084
|
+
// It was observed that doing manual unrolling of the loop that
|
|
1085
|
+
// utilizes distance_single_code() speeds up the computations.
|
|
1086
|
+
|
|
1104
1087
|
template <class HammingComputer, class SearchResultType>
|
|
1105
1088
|
void scan_list_polysemous_hc(
|
|
1106
1089
|
size_t ncode,
|
|
@@ -1111,23 +1094,103 @@ struct IVFPQScannerT : QueryTables {
|
|
|
1111
1094
|
|
|
1112
1095
|
int code_size = pq.code_size;
|
|
1113
1096
|
|
|
1097
|
+
size_t saved_j[8];
|
|
1098
|
+
int counter = 0;
|
|
1099
|
+
|
|
1114
1100
|
HammingComputer hc(q_code.data(), code_size);
|
|
1115
1101
|
|
|
1116
|
-
for (size_t j = 0; j < ncode; j
|
|
1102
|
+
for (size_t j = 0; j < (ncode / 4) * 4; j += 4) {
|
|
1103
|
+
const uint8_t* b_code = codes + j * code_size;
|
|
1104
|
+
|
|
1105
|
+
// Unrolling is a key. Basically, doing multiple popcount
|
|
1106
|
+
// operations one after another speeds things up.
|
|
1107
|
+
|
|
1108
|
+
// 9999999 is just an arbitrary large number
|
|
1109
|
+
int hd0 = (res.skip_entry(j + 0))
|
|
1110
|
+
? 99999999
|
|
1111
|
+
: hc.hamming(b_code + 0 * code_size);
|
|
1112
|
+
int hd1 = (res.skip_entry(j + 1))
|
|
1113
|
+
? 99999999
|
|
1114
|
+
: hc.hamming(b_code + 1 * code_size);
|
|
1115
|
+
int hd2 = (res.skip_entry(j + 2))
|
|
1116
|
+
? 99999999
|
|
1117
|
+
: hc.hamming(b_code + 2 * code_size);
|
|
1118
|
+
int hd3 = (res.skip_entry(j + 3))
|
|
1119
|
+
? 99999999
|
|
1120
|
+
: hc.hamming(b_code + 3 * code_size);
|
|
1121
|
+
|
|
1122
|
+
saved_j[counter] = j + 0;
|
|
1123
|
+
counter = (hd0 < ht) ? (counter + 1) : counter;
|
|
1124
|
+
saved_j[counter] = j + 1;
|
|
1125
|
+
counter = (hd1 < ht) ? (counter + 1) : counter;
|
|
1126
|
+
saved_j[counter] = j + 2;
|
|
1127
|
+
counter = (hd2 < ht) ? (counter + 1) : counter;
|
|
1128
|
+
saved_j[counter] = j + 3;
|
|
1129
|
+
counter = (hd3 < ht) ? (counter + 1) : counter;
|
|
1130
|
+
|
|
1131
|
+
if (counter >= 4) {
|
|
1132
|
+
// process four codes at the same time
|
|
1133
|
+
n_hamming_pass += 4;
|
|
1134
|
+
|
|
1135
|
+
float distance_0 = dis0;
|
|
1136
|
+
float distance_1 = dis0;
|
|
1137
|
+
float distance_2 = dis0;
|
|
1138
|
+
float distance_3 = dis0;
|
|
1139
|
+
distance_four_codes<PQDecoder>(
|
|
1140
|
+
pq,
|
|
1141
|
+
sim_table,
|
|
1142
|
+
codes + saved_j[0] * pq.code_size,
|
|
1143
|
+
codes + saved_j[1] * pq.code_size,
|
|
1144
|
+
codes + saved_j[2] * pq.code_size,
|
|
1145
|
+
codes + saved_j[3] * pq.code_size,
|
|
1146
|
+
distance_0,
|
|
1147
|
+
distance_1,
|
|
1148
|
+
distance_2,
|
|
1149
|
+
distance_3);
|
|
1150
|
+
|
|
1151
|
+
res.add(saved_j[0], dis0 + distance_0);
|
|
1152
|
+
res.add(saved_j[1], dis0 + distance_1);
|
|
1153
|
+
res.add(saved_j[2], dis0 + distance_2);
|
|
1154
|
+
res.add(saved_j[3], dis0 + distance_3);
|
|
1155
|
+
|
|
1156
|
+
//
|
|
1157
|
+
counter -= 4;
|
|
1158
|
+
saved_j[0] = saved_j[4];
|
|
1159
|
+
saved_j[1] = saved_j[5];
|
|
1160
|
+
saved_j[2] = saved_j[6];
|
|
1161
|
+
saved_j[3] = saved_j[7];
|
|
1162
|
+
}
|
|
1163
|
+
}
|
|
1164
|
+
|
|
1165
|
+
for (size_t kk = 0; kk < counter; kk++) {
|
|
1166
|
+
n_hamming_pass++;
|
|
1167
|
+
|
|
1168
|
+
float dis =
|
|
1169
|
+
dis0 +
|
|
1170
|
+
distance_single_code<PQDecoder>(
|
|
1171
|
+
pq, sim_table, codes + saved_j[kk] * pq.code_size);
|
|
1172
|
+
|
|
1173
|
+
res.add(saved_j[kk], dis);
|
|
1174
|
+
}
|
|
1175
|
+
|
|
1176
|
+
// process leftovers
|
|
1177
|
+
for (size_t j = (ncode / 4) * 4; j < ncode; j++) {
|
|
1117
1178
|
if (res.skip_entry(j)) {
|
|
1118
1179
|
continue;
|
|
1119
1180
|
}
|
|
1120
|
-
const uint8_t* b_code = codes;
|
|
1181
|
+
const uint8_t* b_code = codes + j * code_size;
|
|
1121
1182
|
int hd = hc.hamming(b_code);
|
|
1122
1183
|
if (hd < ht) {
|
|
1123
1184
|
n_hamming_pass++;
|
|
1124
1185
|
|
|
1125
|
-
float dis =
|
|
1126
|
-
|
|
1186
|
+
float dis = dis0 +
|
|
1187
|
+
distance_single_code<PQDecoder>(
|
|
1188
|
+
pq, sim_table, codes + j * code_size);
|
|
1127
1189
|
|
|
1128
1190
|
res.add(j, dis);
|
|
1129
1191
|
}
|
|
1130
1192
|
}
|
|
1193
|
+
|
|
1131
1194
|
#pragma omp critical
|
|
1132
1195
|
{ indexIVFPQ_stats.n_hamming_pass += n_hamming_pass; }
|
|
1133
1196
|
}
|
|
@@ -1171,7 +1234,7 @@ struct IVFPQScannerT : QueryTables {
|
|
|
1171
1234
|
* use_sel: store or ignore the IDSelector
|
|
1172
1235
|
*/
|
|
1173
1236
|
template <MetricType METRIC_TYPE, class C, class PQDecoder, bool use_sel>
|
|
1174
|
-
struct IVFPQScanner : IVFPQScannerT<
|
|
1237
|
+
struct IVFPQScanner : IVFPQScannerT<idx_t, METRIC_TYPE, PQDecoder>,
|
|
1175
1238
|
InvertedListScanner {
|
|
1176
1239
|
int precompute_mode;
|
|
1177
1240
|
const IDSelector* sel;
|
|
@@ -1181,9 +1244,7 @@ struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
|
|
1181
1244
|
bool store_pairs,
|
|
1182
1245
|
int precompute_mode,
|
|
1183
1246
|
const IDSelector* sel)
|
|
1184
|
-
: IVFPQScannerT<
|
|
1185
|
-
ivfpq,
|
|
1186
|
-
nullptr),
|
|
1247
|
+
: IVFPQScannerT<idx_t, METRIC_TYPE, PQDecoder>(ivfpq, nullptr),
|
|
1187
1248
|
precompute_mode(precompute_mode),
|
|
1188
1249
|
sel(sel) {
|
|
1189
1250
|
this->store_pairs = store_pairs;
|
|
@@ -1200,14 +1261,9 @@ struct IVFPQScanner : IVFPQScannerT<Index::idx_t, METRIC_TYPE, PQDecoder>,
|
|
|
1200
1261
|
|
|
1201
1262
|
float distance_to_code(const uint8_t* code) const override {
|
|
1202
1263
|
assert(precompute_mode == 2);
|
|
1203
|
-
float dis = this->dis0
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
for (size_t m = 0; m < this->pq.M; m++) {
|
|
1208
|
-
dis += tab[decoder.decode()];
|
|
1209
|
-
tab += this->pq.ksub;
|
|
1210
|
-
}
|
|
1264
|
+
float dis = this->dis0 +
|
|
1265
|
+
distance_single_code<PQDecoder>(
|
|
1266
|
+
this->pq, this->sim_table, code);
|
|
1211
1267
|
return dis;
|
|
1212
1268
|
}
|
|
1213
1269
|
|
|
@@ -156,7 +156,12 @@ void IndexIVFPQFastScan::train_residual(idx_t n, const float* x_in) {
|
|
|
156
156
|
|
|
157
157
|
void IndexIVFPQFastScan::precompute_table() {
|
|
158
158
|
initialize_IVFPQ_precomputed_table(
|
|
159
|
-
use_precomputed_table,
|
|
159
|
+
use_precomputed_table,
|
|
160
|
+
quantizer,
|
|
161
|
+
pq,
|
|
162
|
+
precomputed_table,
|
|
163
|
+
by_residual,
|
|
164
|
+
verbose);
|
|
160
165
|
}
|
|
161
166
|
|
|
162
167
|
/*********************************************************
|
|
@@ -50,7 +50,6 @@ int sgemm_(
|
|
|
50
50
|
|
|
51
51
|
namespace faiss {
|
|
52
52
|
|
|
53
|
-
using idx_t = Index::idx_t;
|
|
54
53
|
using storage_idx_t = NNDescent::storage_idx_t;
|
|
55
54
|
|
|
56
55
|
/**************************************************************
|
|
@@ -89,7 +88,7 @@ struct NegativeDistanceComputer : DistanceComputer {
|
|
|
89
88
|
};
|
|
90
89
|
|
|
91
90
|
DistanceComputer* storage_distance_computer(const Index* storage) {
|
|
92
|
-
if (storage->metric_type
|
|
91
|
+
if (is_similarity_metric(storage->metric_type)) {
|
|
93
92
|
return new NegativeDistanceComputer(storage->get_distance_computer());
|
|
94
93
|
} else {
|
|
95
94
|
return storage->get_distance_computer();
|
|
@@ -23,7 +23,6 @@
|
|
|
23
23
|
|
|
24
24
|
namespace faiss {
|
|
25
25
|
|
|
26
|
-
using idx_t = Index::idx_t;
|
|
27
26
|
using namespace nsg;
|
|
28
27
|
|
|
29
28
|
/**************************************************************
|
|
@@ -113,7 +112,7 @@ void IndexNSG::search(
|
|
|
113
112
|
InterruptCallback::check();
|
|
114
113
|
}
|
|
115
114
|
|
|
116
|
-
if (metric_type
|
|
115
|
+
if (is_similarity_metric(metric_type)) {
|
|
117
116
|
// we need to revert the negated distances
|
|
118
117
|
for (size_t i = 0; i < k * n; i++) {
|
|
119
118
|
distances[i] = -distances[i];
|
|
@@ -19,6 +19,8 @@
|
|
|
19
19
|
#include <faiss/impl/FaissAssert.h>
|
|
20
20
|
#include <faiss/utils/hamming.h>
|
|
21
21
|
|
|
22
|
+
#include <faiss/impl/code_distance/code_distance.h>
|
|
23
|
+
|
|
22
24
|
namespace faiss {
|
|
23
25
|
|
|
24
26
|
/*********************************************************
|
|
@@ -74,22 +76,18 @@ template <class PQDecoder>
|
|
|
74
76
|
struct PQDistanceComputer : FlatCodesDistanceComputer {
|
|
75
77
|
size_t d;
|
|
76
78
|
MetricType metric;
|
|
77
|
-
|
|
79
|
+
idx_t nb;
|
|
78
80
|
const ProductQuantizer& pq;
|
|
79
81
|
const float* sdc;
|
|
80
82
|
std::vector<float> precomputed_table;
|
|
81
83
|
size_t ndis;
|
|
82
84
|
|
|
83
85
|
float distance_to_code(const uint8_t* code) final {
|
|
84
|
-
const float* dt = precomputed_table.data();
|
|
85
|
-
PQDecoder decoder(code, pq.nbits);
|
|
86
|
-
float accu = 0;
|
|
87
|
-
for (int j = 0; j < pq.M; j++) {
|
|
88
|
-
accu += dt[decoder.decode()];
|
|
89
|
-
dt += 1 << decoder.nbits;
|
|
90
|
-
}
|
|
91
86
|
ndis++;
|
|
92
|
-
|
|
87
|
+
|
|
88
|
+
float dis = distance_single_code<PQDecoder>(
|
|
89
|
+
pq, precomputed_table.data(), code);
|
|
90
|
+
return dis;
|
|
93
91
|
}
|
|
94
92
|
|
|
95
93
|
float symmetric_dis(idx_t i, idx_t j) override {
|
|
@@ -123,14 +123,13 @@ void IndexReplicasTemplate<IndexT>::search(
|
|
|
123
123
|
size_t componentsPerVec = sizeof(component_t) == 1 ? (dim + 7) / 8 : dim;
|
|
124
124
|
|
|
125
125
|
// Partition the query by the number of indices we have
|
|
126
|
-
faiss::
|
|
127
|
-
(faiss::
|
|
128
|
-
(faiss::Index::idx_t)this->count();
|
|
126
|
+
faiss::idx_t queriesPerIndex =
|
|
127
|
+
(faiss::idx_t)(n + this->count() - 1) / (faiss::idx_t)this->count();
|
|
129
128
|
FAISS_ASSERT(n / queriesPerIndex <= this->count());
|
|
130
129
|
|
|
131
130
|
auto fn = [queriesPerIndex, componentsPerVec, n, x, k, distances, labels](
|
|
132
131
|
int i, const IndexT* index) {
|
|
133
|
-
faiss::
|
|
132
|
+
faiss::idx_t base = (faiss::idx_t)i * queriesPerIndex;
|
|
134
133
|
|
|
135
134
|
if (base < n) {
|
|
136
135
|
auto numForIndex = std::min(queriesPerIndex, n - base);
|
|
@@ -20,7 +20,6 @@ namespace faiss {
|
|
|
20
20
|
template <typename IndexT>
|
|
21
21
|
class IndexReplicasTemplate : public ThreadedIndex<IndexT> {
|
|
22
22
|
public:
|
|
23
|
-
using idx_t = typename IndexT::idx_t;
|
|
24
23
|
using component_t = typename IndexT::component_t;
|
|
25
24
|
using distance_t = typename IndexT::distance_t;
|
|
26
25
|
|
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
3
|
+
*
|
|
4
|
+
* This source code is licensed under the MIT license found in the
|
|
5
|
+
* LICENSE file in the root directory of this source tree.
|
|
6
|
+
*/
|
|
7
|
+
|
|
1
8
|
#include <faiss/IndexRowwiseMinMax.h>
|
|
2
9
|
|
|
3
10
|
#include <cstdint>
|
|
@@ -11,7 +18,7 @@ namespace faiss {
|
|
|
11
18
|
|
|
12
19
|
namespace {
|
|
13
20
|
|
|
14
|
-
using idx_t = faiss::
|
|
21
|
+
using idx_t = faiss::idx_t;
|
|
15
22
|
|
|
16
23
|
struct StorageMinMaxFP16 {
|
|
17
24
|
uint16_t scaler;
|