llama_cpp 0.14.0 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/llama_cpp.cpp +71 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +9 -0
- data/vendor/tmp/llama.cpp/Makefile +28 -12
- data/vendor/tmp/llama.cpp/ggml-alloc.c +45 -64
- data/vendor/tmp/llama.cpp/ggml-alloc.h +13 -5
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +14 -3
- data/vendor/tmp/llama.cpp/ggml-backend.c +358 -135
- data/vendor/tmp/llama.cpp/ggml-backend.h +41 -17
- data/vendor/tmp/llama.cpp/ggml-common.h +1830 -0
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +187 -1033
- data/vendor/tmp/llama.cpp/ggml-impl.h +6 -2
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +5 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +42 -20
- data/vendor/tmp/llama.cpp/ggml-metal.metal +44 -910
- data/vendor/tmp/llama.cpp/ggml-quants.c +457 -1074
- data/vendor/tmp/llama.cpp/ggml-quants.h +27 -259
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +388 -565
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +6 -39
- data/vendor/tmp/llama.cpp/ggml.c +509 -343
- data/vendor/tmp/llama.cpp/ggml.h +61 -47
- data/vendor/tmp/llama.cpp/llama.cpp +1446 -687
- data/vendor/tmp/llama.cpp/llama.h +25 -11
- data/vendor/tmp/llama.cpp/unicode.cpp +1672 -0
- data/vendor/tmp/llama.cpp/unicode.h +16 -774
- metadata +4 -2
@@ -1,6 +1,12 @@
|
|
1
|
+
#define GGML_COMMON_IMPL_C
|
2
|
+
#include "ggml-common.h"
|
3
|
+
|
1
4
|
#include "ggml-quants.h"
|
2
5
|
#include "ggml-impl.h"
|
3
6
|
|
7
|
+
#define GGML_COMMON_IMPL_C
|
8
|
+
#include "ggml-common.h"
|
9
|
+
|
4
10
|
#include <math.h>
|
5
11
|
#include <string.h>
|
6
12
|
#include <assert.h>
|
@@ -948,7 +954,7 @@ void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict
|
|
948
954
|
const float d = amax / ((1 << 7) - 1);
|
949
955
|
const float id = d ? 1.0f/d : 0.0f;
|
950
956
|
|
951
|
-
y[i].d = d;
|
957
|
+
y[i].d = GGML_FP32_TO_FP16(d);
|
952
958
|
|
953
959
|
int sum = 0;
|
954
960
|
|
@@ -963,7 +969,7 @@ void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict
|
|
963
969
|
sum += y[i].qs[QK8_1/2 + j];
|
964
970
|
}
|
965
971
|
|
966
|
-
y[i].s = sum*d;
|
972
|
+
y[i].s = GGML_FP32_TO_FP16(sum*d);
|
967
973
|
}
|
968
974
|
}
|
969
975
|
|
@@ -991,7 +997,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
|
991
997
|
const float d = amax / ((1 << 7) - 1);
|
992
998
|
const float id = d ? 1.0f/d : 0.0f;
|
993
999
|
|
994
|
-
y[i].d = d;
|
1000
|
+
y[i].d = GGML_FP32_TO_FP16(d);
|
995
1001
|
|
996
1002
|
int32x4_t accv = vdupq_n_s32(0);
|
997
1003
|
|
@@ -1007,7 +1013,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
|
1007
1013
|
accv = vaddq_s32(accv, vi);
|
1008
1014
|
}
|
1009
1015
|
|
1010
|
-
y[i].s = d * vaddvq_s32(accv);
|
1016
|
+
y[i].s = GGML_FP32_TO_FP16(d * vaddvq_s32(accv));
|
1011
1017
|
}
|
1012
1018
|
#elif defined(__wasm_simd128__)
|
1013
1019
|
for (int i = 0; i < nb; i++) {
|
@@ -1030,7 +1036,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
|
1030
1036
|
const float d = amax / ((1 << 7) - 1);
|
1031
1037
|
const float id = d ? 1.0f/d : 0.0f;
|
1032
1038
|
|
1033
|
-
y[i].d = d;
|
1039
|
+
y[i].d = GGML_FP32_TO_FP16(d);
|
1034
1040
|
|
1035
1041
|
v128_t accv = wasm_i32x4_splat(0);
|
1036
1042
|
|
@@ -1046,10 +1052,11 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
|
1046
1052
|
accv = wasm_i32x4_add(accv, vi);
|
1047
1053
|
}
|
1048
1054
|
|
1049
|
-
y[i].s =
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1055
|
+
y[i].s = GGML_FP32_TO_FP16(
|
1056
|
+
d * (wasm_i32x4_extract_lane(accv, 0) +
|
1057
|
+
wasm_i32x4_extract_lane(accv, 1) +
|
1058
|
+
wasm_i32x4_extract_lane(accv, 2) +
|
1059
|
+
wasm_i32x4_extract_lane(accv, 3)));
|
1053
1060
|
}
|
1054
1061
|
#elif defined(__AVX2__) || defined(__AVX__)
|
1055
1062
|
for (int i = 0; i < nb; i++) {
|
@@ -1074,7 +1081,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
|
1074
1081
|
|
1075
1082
|
// Quantize these floats
|
1076
1083
|
const float d = maxScalar / 127.f;
|
1077
|
-
y[i].d = d;
|
1084
|
+
y[i].d = GGML_FP32_TO_FP16(d);
|
1078
1085
|
const float id = ( maxScalar != 0.0f ) ? 127.f / maxScalar : 0.0f;
|
1079
1086
|
const __m256 mul = _mm256_set1_ps( id );
|
1080
1087
|
|
@@ -1098,7 +1105,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
|
1098
1105
|
|
1099
1106
|
#if defined(__AVX2__)
|
1100
1107
|
// Compute the sum of the quants and set y[i].s
|
1101
|
-
y[i].s = d * hsum_i32_8(_mm256_add_epi32(_mm256_add_epi32(i0, i1), _mm256_add_epi32(i2, i3)));
|
1108
|
+
y[i].s = GGML_FP32_TO_FP16(d * hsum_i32_8(_mm256_add_epi32(_mm256_add_epi32(i0, i1), _mm256_add_epi32(i2, i3))));
|
1102
1109
|
|
1103
1110
|
// Convert int32 to int16
|
1104
1111
|
i0 = _mm256_packs_epi32( i0, i1 ); // 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15
|
@@ -1128,7 +1135,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
|
1128
1135
|
// Compute the sum of the quants and set y[i].s
|
1129
1136
|
const __m128i s0 = _mm_add_epi32(_mm_add_epi32(ni0, ni1), _mm_add_epi32(ni2, ni3));
|
1130
1137
|
const __m128i s1 = _mm_add_epi32(_mm_add_epi32(ni4, ni5), _mm_add_epi32(ni6, ni7));
|
1131
|
-
y[i].s = d * hsum_i32_4(_mm_add_epi32(s0, s1));
|
1138
|
+
y[i].s = GGML_FP32_TO_FP16(d * hsum_i32_4(_mm_add_epi32(s0, s1)));
|
1132
1139
|
|
1133
1140
|
// Convert int32 to int16
|
1134
1141
|
ni0 = _mm_packs_epi32( ni0, ni1 );
|
@@ -1159,7 +1166,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
|
1159
1166
|
const float d = amax / ((1 << 7) - 1);
|
1160
1167
|
const float id = d ? 1.0f/d : 0.0f;
|
1161
1168
|
|
1162
|
-
y[i].d = d;
|
1169
|
+
y[i].d = GGML_FP32_TO_FP16(d);
|
1163
1170
|
|
1164
1171
|
vfloat32m4_t x0 = __riscv_vfmul_vf_f32m4(v_x, id, vl);
|
1165
1172
|
|
@@ -1176,7 +1183,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
|
1176
1183
|
|
1177
1184
|
// set y[i].s
|
1178
1185
|
int sum = __riscv_vmv_x_s_i16m1_i16(vwrs);
|
1179
|
-
y[i].s = sum*d;
|
1186
|
+
y[i].s = GGML_FP32_TO_FP16(sum*d);
|
1180
1187
|
}
|
1181
1188
|
#else
|
1182
1189
|
GGML_UNUSED(nb);
|
@@ -1701,16 +1708,6 @@ void quantize_row_q2_K(const float * restrict x, void * restrict vy, int k) {
|
|
1701
1708
|
quantize_row_q2_K_reference(x, vy, k);
|
1702
1709
|
}
|
1703
1710
|
|
1704
|
-
size_t ggml_quantize_q2_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
1705
|
-
(void)hist; // TODO: collect histograms
|
1706
|
-
|
1707
|
-
for (int j = 0; j < n; j += k) {
|
1708
|
-
block_q2_K * restrict y = (block_q2_K *)dst + j/QK_K;
|
1709
|
-
quantize_row_q2_K_reference(src + j, y, k);
|
1710
|
-
}
|
1711
|
-
return (n/QK_K*sizeof(block_q2_K));
|
1712
|
-
}
|
1713
|
-
|
1714
1711
|
static float make_qkx3_quants(int n, int nmax, const float * restrict x, const float * restrict weights,
|
1715
1712
|
uint8_t * restrict L, float * restrict the_min, uint8_t * restrict Laux,
|
1716
1713
|
float rmin, float rdelta, int nstep, bool use_mad) {
|
@@ -1963,8 +1960,7 @@ static void quantize_row_q2_K_impl(const float * restrict x, block_q2_K * restri
|
|
1963
1960
|
}
|
1964
1961
|
}
|
1965
1962
|
|
1966
|
-
size_t quantize_q2_K(const float * src, void * dst, int nrow, int n_per_row,
|
1967
|
-
(void)hist;
|
1963
|
+
size_t quantize_q2_K(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
1968
1964
|
size_t row_size = ggml_row_size(GGML_TYPE_Q2_K, n_per_row);
|
1969
1965
|
if (!quant_weights) {
|
1970
1966
|
quantize_row_q2_K_reference(src, dst, nrow*n_per_row);
|
@@ -2183,16 +2179,6 @@ void quantize_row_q3_K(const float * restrict x, void * restrict vy, int k) {
|
|
2183
2179
|
quantize_row_q3_K_reference(x, vy, k);
|
2184
2180
|
}
|
2185
2181
|
|
2186
|
-
size_t ggml_quantize_q3_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
2187
|
-
(void)hist; // TODO: collect histograms
|
2188
|
-
|
2189
|
-
for (int j = 0; j < n; j += k) {
|
2190
|
-
block_q3_K * restrict y = (block_q3_K *)dst + j/QK_K;
|
2191
|
-
quantize_row_q3_K_reference(src + j, y, k);
|
2192
|
-
}
|
2193
|
-
return (n/QK_K*sizeof(block_q3_K));
|
2194
|
-
}
|
2195
|
-
|
2196
2182
|
static void quantize_row_q3_K_impl(const float * restrict x, block_q3_K * restrict y, int n_per_row, const float * restrict quant_weights) {
|
2197
2183
|
#if QK_K != 256
|
2198
2184
|
(void)quant_weights;
|
@@ -2282,8 +2268,7 @@ static void quantize_row_q3_K_impl(const float * restrict x, block_q3_K * restri
|
|
2282
2268
|
#endif
|
2283
2269
|
}
|
2284
2270
|
|
2285
|
-
size_t quantize_q3_K(const float * src, void * dst, int nrow, int n_per_row,
|
2286
|
-
(void)hist;
|
2271
|
+
size_t quantize_q3_K(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
2287
2272
|
size_t row_size = ggml_row_size(GGML_TYPE_Q3_K, n_per_row);
|
2288
2273
|
if (!quant_weights) {
|
2289
2274
|
quantize_row_q3_K_reference(src, dst, nrow*n_per_row);
|
@@ -2453,17 +2438,6 @@ void quantize_row_q4_K(const float * restrict x, void * restrict vy, int k) {
|
|
2453
2438
|
quantize_row_q4_K_reference(x, y, k);
|
2454
2439
|
}
|
2455
2440
|
|
2456
|
-
size_t ggml_quantize_q4_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
2457
|
-
assert(k % QK_K == 0);
|
2458
|
-
(void)hist; // TODO: collect histograms
|
2459
|
-
|
2460
|
-
for (int j = 0; j < n; j += k) {
|
2461
|
-
block_q4_K * restrict y = (block_q4_K *)dst + j/QK_K;
|
2462
|
-
quantize_row_q4_K_reference(src + j, y, k);
|
2463
|
-
}
|
2464
|
-
return (n/QK_K*sizeof(block_q4_K));
|
2465
|
-
}
|
2466
|
-
|
2467
2441
|
static void quantize_row_q4_K_impl(const float * restrict x, block_q4_K * restrict y, int n_per_row, const float * quant_weights) {
|
2468
2442
|
#if QK_K != 256
|
2469
2443
|
(void)quant_weights;
|
@@ -2542,8 +2516,7 @@ static void quantize_row_q4_K_impl(const float * restrict x, block_q4_K * restri
|
|
2542
2516
|
#endif
|
2543
2517
|
}
|
2544
2518
|
|
2545
|
-
size_t quantize_q4_K(const float * src, void * dst, int nrow, int n_per_row,
|
2546
|
-
(void)hist;
|
2519
|
+
size_t quantize_q4_K(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
2547
2520
|
size_t row_size = ggml_row_size(GGML_TYPE_Q4_K, n_per_row);
|
2548
2521
|
if (!quant_weights) {
|
2549
2522
|
quantize_row_q4_K_reference(src, dst, nrow*n_per_row);
|
@@ -2754,17 +2727,6 @@ void quantize_row_q5_K(const float * restrict x, void * restrict vy, int k) {
|
|
2754
2727
|
quantize_row_q5_K_reference(x, y, k);
|
2755
2728
|
}
|
2756
2729
|
|
2757
|
-
size_t ggml_quantize_q5_K(const float * restrict src, void * restrict dst, int n, int k, int64_t * restrict hist) {
|
2758
|
-
assert(k % QK_K == 0);
|
2759
|
-
(void)hist; // TODO: collect histograms
|
2760
|
-
|
2761
|
-
for (int j = 0; j < n; j += k) {
|
2762
|
-
block_q5_K * restrict y = (block_q5_K *)dst + j/QK_K;
|
2763
|
-
quantize_row_q5_K_reference(src + j, y, k);
|
2764
|
-
}
|
2765
|
-
return (n/QK_K*sizeof(block_q5_K));
|
2766
|
-
}
|
2767
|
-
|
2768
2730
|
static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restrict y, int n_per_row, const float * quant_weights) {
|
2769
2731
|
#if QK_K != 256
|
2770
2732
|
(void)quant_weights;
|
@@ -2863,8 +2825,7 @@ static void quantize_row_q5_K_impl(const float * restrict x, block_q5_K * restri
|
|
2863
2825
|
#endif
|
2864
2826
|
}
|
2865
2827
|
|
2866
|
-
size_t quantize_q5_K(const float * src, void * dst, int nrow, int n_per_row,
|
2867
|
-
(void)hist;
|
2828
|
+
size_t quantize_q5_K(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
2868
2829
|
size_t row_size = ggml_row_size(GGML_TYPE_Q5_K, n_per_row);
|
2869
2830
|
if (!quant_weights) {
|
2870
2831
|
quantize_row_q5_K_reference(src, dst, nrow*n_per_row);
|
@@ -3017,17 +2978,6 @@ void quantize_row_q6_K(const float * restrict x, void * restrict vy, int k) {
|
|
3017
2978
|
quantize_row_q6_K_reference(x, y, k);
|
3018
2979
|
}
|
3019
2980
|
|
3020
|
-
size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist) {
|
3021
|
-
assert(k % QK_K == 0);
|
3022
|
-
(void)hist; // TODO: collect histograms
|
3023
|
-
|
3024
|
-
for (int j = 0; j < n; j += k) {
|
3025
|
-
block_q6_K * restrict y = (block_q6_K *)dst + j/QK_K;
|
3026
|
-
quantize_row_q6_K_reference(src + j, y, k);
|
3027
|
-
}
|
3028
|
-
return (n/QK_K*sizeof(block_q6_K));
|
3029
|
-
}
|
3030
|
-
|
3031
2981
|
static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restrict y, int n_per_row, const float * quant_weights) {
|
3032
2982
|
#if QK_K != 256
|
3033
2983
|
(void)quant_weights;
|
@@ -3117,8 +3067,7 @@ static void quantize_row_q6_K_impl(const float * restrict x, block_q6_K * restri
|
|
3117
3067
|
#endif
|
3118
3068
|
}
|
3119
3069
|
|
3120
|
-
size_t quantize_q6_K(const float * src, void * dst, int nrow, int n_per_row,
|
3121
|
-
(void)hist;
|
3070
|
+
size_t quantize_q6_K(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
3122
3071
|
size_t row_size = ggml_row_size(GGML_TYPE_Q6_K, n_per_row);
|
3123
3072
|
if (!quant_weights) {
|
3124
3073
|
quantize_row_q6_K_reference(src, dst, nrow*n_per_row);
|
@@ -3162,9 +3111,10 @@ static void quantize_row_q4_0_impl(const float * restrict x, block_q4_0 * restri
|
|
3162
3111
|
}
|
3163
3112
|
}
|
3164
3113
|
|
3165
|
-
size_t quantize_q4_0(const float * src, void * dst, int nrow, int n_per_row,
|
3114
|
+
size_t quantize_q4_0(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
3166
3115
|
if (!quant_weights) {
|
3167
|
-
|
3116
|
+
quantize_row_q4_0_reference(src, dst, nrow*n_per_row);
|
3117
|
+
return nrow * ggml_row_size(GGML_TYPE_Q4_0, n_per_row);
|
3168
3118
|
}
|
3169
3119
|
size_t row_size = ggml_row_size(GGML_TYPE_Q4_0, n_per_row);
|
3170
3120
|
char * qrow = (char *)dst;
|
@@ -3206,9 +3156,10 @@ static void quantize_row_q4_1_impl(const float * restrict x, block_q4_1 * restri
|
|
3206
3156
|
}
|
3207
3157
|
}
|
3208
3158
|
|
3209
|
-
size_t quantize_q4_1(const float * src, void * dst, int nrow, int n_per_row,
|
3159
|
+
size_t quantize_q4_1(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
3210
3160
|
if (!quant_weights) {
|
3211
|
-
|
3161
|
+
quantize_row_q4_1_reference(src, dst, nrow*n_per_row);
|
3162
|
+
return nrow * ggml_row_size(GGML_TYPE_Q4_1, n_per_row);
|
3212
3163
|
}
|
3213
3164
|
size_t row_size = ggml_row_size(GGML_TYPE_Q4_1, n_per_row);
|
3214
3165
|
char * qrow = (char *)dst;
|
@@ -3259,9 +3210,10 @@ static void quantize_row_q5_0_impl(const float * restrict x, block_q5_0 * restri
|
|
3259
3210
|
}
|
3260
3211
|
}
|
3261
3212
|
|
3262
|
-
size_t quantize_q5_0(const float * src, void * dst, int nrow, int n_per_row,
|
3213
|
+
size_t quantize_q5_0(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
3263
3214
|
if (!quant_weights) {
|
3264
|
-
|
3215
|
+
quantize_row_q5_0_reference(src, dst, nrow*n_per_row);
|
3216
|
+
return nrow * ggml_row_size(GGML_TYPE_Q5_0, n_per_row);
|
3265
3217
|
}
|
3266
3218
|
size_t row_size = ggml_row_size(GGML_TYPE_Q5_0, n_per_row);
|
3267
3219
|
char * qrow = (char *)dst;
|
@@ -3311,9 +3263,10 @@ static void quantize_row_q5_1_impl(const float * restrict x, block_q5_1 * restri
|
|
3311
3263
|
}
|
3312
3264
|
}
|
3313
3265
|
|
3314
|
-
size_t quantize_q5_1(const float * src, void * dst, int nrow, int n_per_row,
|
3266
|
+
size_t quantize_q5_1(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
3315
3267
|
if (!quant_weights) {
|
3316
|
-
|
3268
|
+
quantize_row_q5_1_reference(src, dst, nrow*n_per_row);
|
3269
|
+
return nrow * ggml_row_size(GGML_TYPE_Q5_1, n_per_row);
|
3317
3270
|
}
|
3318
3271
|
size_t row_size = ggml_row_size(GGML_TYPE_Q5_1, n_per_row);
|
3319
3272
|
char * qrow = (char *)dst;
|
@@ -3325,712 +3278,14 @@ size_t quantize_q5_1(const float * src, void * dst, int nrow, int n_per_row, int
|
|
3325
3278
|
return nrow * row_size;
|
3326
3279
|
}
|
3327
3280
|
|
3328
|
-
|
3329
|
-
|
3330
|
-
|
3331
|
-
|
3332
|
-
|
3333
|
-
|
3334
|
-
0x0808080819081908, 0x0808080819190808, 0x0808080819192b08, 0x08080808192b0819,
|
3335
|
-
0x08080808192b1908, 0x080808082b080808, 0x080808082b08082b, 0x080808082b082b2b,
|
3336
|
-
0x080808082b2b082b, 0x0808081908080819, 0x0808081908081908, 0x0808081908190808,
|
3337
|
-
0x0808081908191919, 0x0808081919080808, 0x080808192b081908, 0x080808192b192b08,
|
3338
|
-
0x0808082b08080808, 0x0808082b0808082b, 0x0808082b082b082b, 0x0808082b2b08082b,
|
3339
|
-
0x0808190808080819, 0x0808190808081908, 0x0808190808190808, 0x08081908082b0819,
|
3340
|
-
0x08081908082b1908, 0x0808190819080808, 0x080819081908082b, 0x0808190819082b08,
|
3341
|
-
0x08081908192b0808, 0x080819082b080819, 0x080819082b081908, 0x080819082b190808,
|
3342
|
-
0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b, 0x0808191908082b08,
|
3343
|
-
0x08081919082b0808, 0x080819191908192b, 0x08081919192b2b19, 0x080819192b080808,
|
3344
|
-
0x080819192b190819, 0x0808192b08082b19, 0x0808192b08190808, 0x0808192b19080808,
|
3345
|
-
0x0808192b2b081908, 0x0808192b2b2b1908, 0x08082b0808080808, 0x08082b0808081919,
|
3346
|
-
0x08082b0808082b08, 0x08082b0808191908, 0x08082b08082b2b08, 0x08082b0819080819,
|
3347
|
-
0x08082b0819081908, 0x08082b0819190808, 0x08082b081919082b, 0x08082b082b082b08,
|
3348
|
-
0x08082b1908081908, 0x08082b1919080808, 0x08082b2b0808082b, 0x08082b2b08191908,
|
3349
|
-
0x0819080808080819, 0x0819080808081908, 0x0819080808190808, 0x08190808082b0819,
|
3350
|
-
0x0819080819080808, 0x08190808192b0808, 0x081908082b081908, 0x081908082b190808,
|
3351
|
-
0x081908082b191919, 0x0819081908080808, 0x0819081908082b08, 0x08190819082b0808,
|
3352
|
-
0x0819081919190808, 0x0819081919192b2b, 0x081908192b080808, 0x0819082b082b1908,
|
3353
|
-
0x0819082b19081919, 0x0819190808080808, 0x0819190808082b08, 0x08191908082b0808,
|
3354
|
-
0x08191908082b1919, 0x0819190819082b19, 0x081919082b080808, 0x0819191908192b08,
|
3355
|
-
0x08191919192b082b, 0x0819192b08080808, 0x0819192b0819192b, 0x08192b0808080819,
|
3356
|
-
0x08192b0808081908, 0x08192b0808190808, 0x08192b0819080808, 0x08192b082b080819,
|
3357
|
-
0x08192b1908080808, 0x08192b1908081919, 0x08192b192b2b0808, 0x08192b2b19190819,
|
3358
|
-
0x082b080808080808, 0x082b08080808082b, 0x082b080808082b2b, 0x082b080819081908,
|
3359
|
-
0x082b0808192b0819, 0x082b08082b080808, 0x082b08082b08082b, 0x082b0819082b2b19,
|
3360
|
-
0x082b081919082b08, 0x082b082b08080808, 0x082b082b0808082b, 0x082b190808080819,
|
3361
|
-
0x082b190808081908, 0x082b190808190808, 0x082b190819080808, 0x082b19081919192b,
|
3362
|
-
0x082b191908080808, 0x082b191919080819, 0x082b1919192b1908, 0x082b192b2b190808,
|
3363
|
-
0x082b2b0808082b08, 0x082b2b08082b0808, 0x082b2b082b191908, 0x082b2b2b19081908,
|
3364
|
-
0x1908080808080819, 0x1908080808081908, 0x1908080808190808, 0x1908080808192b08,
|
3365
|
-
0x19080808082b0819, 0x19080808082b1908, 0x1908080819080808, 0x1908080819082b08,
|
3366
|
-
0x190808081919192b, 0x19080808192b0808, 0x190808082b080819, 0x190808082b081908,
|
3367
|
-
0x190808082b190808, 0x1908081908080808, 0x19080819082b0808, 0x19080819192b0819,
|
3368
|
-
0x190808192b080808, 0x190808192b081919, 0x1908082b08080819, 0x1908082b08190808,
|
3369
|
-
0x1908082b19082b08, 0x1908082b1919192b, 0x1908082b192b2b08, 0x1908190808080808,
|
3370
|
-
0x1908190808082b08, 0x19081908082b0808, 0x190819082b080808, 0x190819082b192b19,
|
3371
|
-
0x190819190819082b, 0x19081919082b1908, 0x1908192b08080808, 0x19082b0808080819,
|
3372
|
-
0x19082b0808081908, 0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919,
|
3373
|
-
0x19082b1908080808, 0x19082b1919192b08, 0x19082b19192b0819, 0x19082b192b08082b,
|
3374
|
-
0x19082b2b19081919, 0x19082b2b2b190808, 0x1919080808080808, 0x1919080808082b08,
|
3375
|
-
0x1919080808190819, 0x1919080808192b19, 0x19190808082b0808, 0x191908082b080808,
|
3376
|
-
0x191908082b082b08, 0x1919081908081908, 0x191908191908082b, 0x191908192b2b1908,
|
3377
|
-
0x1919082b2b190819, 0x191919082b190808, 0x191919082b19082b, 0x1919191908082b2b,
|
3378
|
-
0x1919192b08080819, 0x1919192b19191908, 0x19192b0808080808, 0x19192b0808190819,
|
3379
|
-
0x19192b0808192b19, 0x19192b08192b1908, 0x19192b1919080808, 0x19192b2b08082b08,
|
3380
|
-
0x192b080808081908, 0x192b080808190808, 0x192b080819080808, 0x192b0808192b2b08,
|
3381
|
-
0x192b081908080808, 0x192b081919191919, 0x192b082b08192b08, 0x192b082b192b0808,
|
3382
|
-
0x192b190808080808, 0x192b190808081919, 0x192b191908190808, 0x192b19190819082b,
|
3383
|
-
0x192b19192b081908, 0x192b2b081908082b, 0x2b08080808080808, 0x2b0808080808082b,
|
3384
|
-
0x2b08080808082b2b, 0x2b08080819080819, 0x2b0808082b08082b, 0x2b08081908081908,
|
3385
|
-
0x2b08081908192b08, 0x2b08081919080808, 0x2b08082b08190819, 0x2b08190808080819,
|
3386
|
-
0x2b08190808081908, 0x2b08190808190808, 0x2b08190808191919, 0x2b08190819080808,
|
3387
|
-
0x2b081908192b0808, 0x2b08191908080808, 0x2b0819191908192b, 0x2b0819192b191908,
|
3388
|
-
0x2b08192b08082b19, 0x2b08192b19080808, 0x2b08192b192b0808, 0x2b082b080808082b,
|
3389
|
-
0x2b082b1908081908, 0x2b082b2b08190819, 0x2b19080808081908, 0x2b19080808190808,
|
3390
|
-
0x2b190808082b1908, 0x2b19080819080808, 0x2b1908082b2b0819, 0x2b1908190819192b,
|
3391
|
-
0x2b1908192b080808, 0x2b19082b19081919, 0x2b19190808080808, 0x2b191908082b082b,
|
3392
|
-
0x2b19190819081908, 0x2b19191919190819, 0x2b192b082b080819, 0x2b192b19082b0808,
|
3393
|
-
0x2b2b08080808082b, 0x2b2b080819190808, 0x2b2b08082b081919, 0x2b2b081908082b19,
|
3394
|
-
0x2b2b082b08080808, 0x2b2b190808192b08, 0x2b2b2b0819190808, 0x2b2b2b1908081908,
|
3395
|
-
};
|
3396
|
-
|
3397
|
-
static const uint64_t iq2xs_grid[512] = {
|
3398
|
-
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
3399
|
-
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
3400
|
-
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
3401
|
-
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
3402
|
-
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
3403
|
-
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x080808082b080808,
|
3404
|
-
0x080808082b08082b, 0x080808082b081919, 0x080808082b082b08, 0x080808082b190819,
|
3405
|
-
0x080808082b191908, 0x080808082b192b19, 0x080808082b2b0808, 0x0808081908080819,
|
3406
|
-
0x0808081908081908, 0x080808190808192b, 0x0808081908082b19, 0x0808081908190808,
|
3407
|
-
0x080808190819082b, 0x0808081908191919, 0x0808081908192b08, 0x0808081908192b2b,
|
3408
|
-
0x08080819082b0819, 0x08080819082b1908, 0x0808081919080808, 0x080808191908082b,
|
3409
|
-
0x0808081919081919, 0x0808081919082b08, 0x0808081919190819, 0x0808081919191908,
|
3410
|
-
0x08080819192b0808, 0x08080819192b2b08, 0x080808192b080819, 0x080808192b081908,
|
3411
|
-
0x080808192b190808, 0x0808082b08080808, 0x0808082b0808082b, 0x0808082b08081919,
|
3412
|
-
0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908, 0x0808082b082b0808,
|
3413
|
-
0x0808082b19080819, 0x0808082b19081908, 0x0808082b19190808, 0x0808082b19191919,
|
3414
|
-
0x0808082b2b080808, 0x0808082b2b082b2b, 0x0808190808080819, 0x0808190808081908,
|
3415
|
-
0x080819080808192b, 0x0808190808082b19, 0x0808190808190808, 0x080819080819082b,
|
3416
|
-
0x0808190808191919, 0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908,
|
3417
|
-
0x0808190819080808, 0x080819081908082b, 0x0808190819081919, 0x0808190819082b08,
|
3418
|
-
0x0808190819190819, 0x0808190819191908, 0x080819081919192b, 0x08081908192b0808,
|
3419
|
-
0x080819082b080819, 0x080819082b081908, 0x080819082b190808, 0x0808191908080808,
|
3420
|
-
0x080819190808082b, 0x0808191908081919, 0x0808191908082b08, 0x0808191908190819,
|
3421
|
-
0x0808191908191908, 0x08081919082b0808, 0x0808191919080819, 0x0808191919081908,
|
3422
|
-
0x0808191919190808, 0x08081919192b0819, 0x080819192b080808, 0x0808192b08080819,
|
3423
|
-
0x0808192b08081908, 0x0808192b08190808, 0x0808192b082b192b, 0x0808192b19080808,
|
3424
|
-
0x0808192b1908082b, 0x0808192b2b081908, 0x08082b0808080808, 0x08082b080808082b,
|
3425
|
-
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808082b2b, 0x08082b0808190819,
|
3426
|
-
0x08082b0808191908, 0x08082b08082b0808, 0x08082b08082b1919, 0x08082b0819080819,
|
3427
|
-
0x08082b0819081908, 0x08082b0819190808, 0x08082b0819192b08, 0x08082b082b080808,
|
3428
|
-
0x08082b082b2b0808, 0x08082b082b2b2b2b, 0x08082b1908080819, 0x08082b1908081908,
|
3429
|
-
0x08082b1908190808, 0x08082b1919080808, 0x08082b192b080819, 0x08082b192b082b19,
|
3430
|
-
0x08082b2b08080808, 0x08082b2b082b0808, 0x08082b2b082b2b08, 0x08082b2b2b19192b,
|
3431
|
-
0x08082b2b2b2b0808, 0x0819080808080819, 0x0819080808081908, 0x081908080808192b,
|
3432
|
-
0x0819080808082b19, 0x0819080808190808, 0x081908080819082b, 0x0819080808191919,
|
3433
|
-
0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908, 0x0819080819080808,
|
3434
|
-
0x081908081908082b, 0x0819080819081919, 0x0819080819082b08, 0x0819080819190819,
|
3435
|
-
0x0819080819191908, 0x08190808192b0808, 0x08190808192b2b2b, 0x081908082b080819,
|
3436
|
-
0x081908082b081908, 0x081908082b190808, 0x0819081908080808, 0x081908190808082b,
|
3437
|
-
0x0819081908081919, 0x0819081908082b08, 0x0819081908190819, 0x0819081908191908,
|
3438
|
-
0x08190819082b0808, 0x0819081919080819, 0x0819081919081908, 0x0819081919190808,
|
3439
|
-
0x081908192b080808, 0x081908192b191908, 0x081908192b19192b, 0x0819082b08080819,
|
3440
|
-
0x0819082b08081908, 0x0819082b0808192b, 0x0819082b08190808, 0x0819082b19080808,
|
3441
|
-
0x0819082b192b0808, 0x0819190808080808, 0x081919080808082b, 0x0819190808081919,
|
3442
|
-
0x0819190808082b08, 0x0819190808190819, 0x0819190808191908, 0x08191908082b0808,
|
3443
|
-
0x0819190819080819, 0x0819190819081908, 0x0819190819082b19, 0x0819190819190808,
|
3444
|
-
0x08191908192b1908, 0x081919082b080808, 0x0819191908080819, 0x0819191908081908,
|
3445
|
-
0x0819191908190808, 0x0819191919080808, 0x0819192b08080808, 0x0819192b08191908,
|
3446
|
-
0x0819192b19082b19, 0x08192b0808080819, 0x08192b0808081908, 0x08192b0808190808,
|
3447
|
-
0x08192b080819082b, 0x08192b0819080808, 0x08192b0819191908, 0x08192b082b08192b,
|
3448
|
-
0x08192b1908080808, 0x08192b1908081919, 0x08192b19192b192b, 0x08192b2b19190819,
|
3449
|
-
0x08192b2b2b2b2b19, 0x082b080808080808, 0x082b08080808082b, 0x082b080808081919,
|
3450
|
-
0x082b080808082b08, 0x082b080808082b2b, 0x082b080808190819, 0x082b080808191908,
|
3451
|
-
0x082b0808082b0808, 0x082b080819080819, 0x082b080819081908, 0x082b080819190808,
|
3452
|
-
0x082b08082b080808, 0x082b08082b2b0808, 0x082b081908080819, 0x082b081908081908,
|
3453
|
-
0x082b081908190808, 0x082b081919080808, 0x082b081919082b08, 0x082b0819192b1919,
|
3454
|
-
0x082b082b08080808, 0x082b082b082b082b, 0x082b082b2b080808, 0x082b082b2b2b2b08,
|
3455
|
-
0x082b190808080819, 0x082b190808081908, 0x082b190808190808, 0x082b1908082b2b19,
|
3456
|
-
0x082b190819080808, 0x082b191908080808, 0x082b191919080819, 0x082b19191919082b,
|
3457
|
-
0x082b19192b192b19, 0x082b192b08080819, 0x082b192b08192b2b, 0x082b192b2b2b192b,
|
3458
|
-
0x082b2b0808080808, 0x082b2b0808082b08, 0x082b2b0808082b2b, 0x082b2b08082b0808,
|
3459
|
-
0x082b2b0819191919, 0x082b2b082b082b08, 0x082b2b082b2b082b, 0x082b2b19192b2b08,
|
3460
|
-
0x082b2b192b190808, 0x082b2b2b08082b08, 0x082b2b2b082b0808, 0x082b2b2b2b08082b,
|
3461
|
-
0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819, 0x1908080808081908,
|
3462
|
-
0x190808080808192b, 0x1908080808082b19, 0x1908080808190808, 0x190808080819082b,
|
3463
|
-
0x1908080808191919, 0x1908080808192b08, 0x19080808082b0819, 0x19080808082b1908,
|
3464
|
-
0x1908080819080808, 0x190808081908082b, 0x1908080819081919, 0x1908080819082b08,
|
3465
|
-
0x1908080819082b2b, 0x1908080819190819, 0x1908080819191908, 0x19080808192b0808,
|
3466
|
-
0x19080808192b1919, 0x190808082b080819, 0x190808082b081908, 0x190808082b190808,
|
3467
|
-
0x1908081908080808, 0x190808190808082b, 0x1908081908081919, 0x1908081908082b08,
|
3468
|
-
0x1908081908190819, 0x1908081908191908, 0x19080819082b0808, 0x1908081919080819,
|
3469
|
-
0x1908081919081908, 0x1908081919190808, 0x190808192b080808, 0x190808192b081919,
|
3470
|
-
0x190808192b2b082b, 0x1908082b08080819, 0x1908082b08081908, 0x1908082b08190808,
|
3471
|
-
0x1908082b0819082b, 0x1908082b082b2b19, 0x1908082b19080808, 0x1908190808080808,
|
3472
|
-
0x190819080808082b, 0x1908190808081919, 0x1908190808082b08, 0x1908190808190819,
|
3473
|
-
0x1908190808191908, 0x1908190808192b19, 0x19081908082b0808, 0x1908190819080819,
|
3474
|
-
0x1908190819081908, 0x1908190819190808, 0x190819082b080808, 0x190819082b191908,
|
3475
|
-
0x1908191908080819, 0x1908191908081908, 0x1908191908190808, 0x19081919082b1908,
|
3476
|
-
0x1908191919080808, 0x190819192b192b2b, 0x1908192b08080808, 0x1908192b08082b2b,
|
3477
|
-
0x1908192b19081908, 0x1908192b19190808, 0x19082b0808080819, 0x19082b0808081908,
|
3478
|
-
0x19082b0808190808, 0x19082b0819080808, 0x19082b0819081919, 0x19082b0819191908,
|
3479
|
-
0x19082b08192b082b, 0x19082b1908080808, 0x19082b1908190819, 0x19082b1919081908,
|
3480
|
-
0x19082b1919190808, 0x19082b19192b2b19, 0x19082b2b08081908, 0x1919080808080808,
|
3481
|
-
0x191908080808082b, 0x1919080808081919, 0x1919080808082b08, 0x1919080808190819,
|
3482
|
-
0x1919080808191908, 0x19190808082b0808, 0x19190808082b2b08, 0x1919080819080819,
|
3483
|
-
0x1919080819081908, 0x1919080819190808, 0x191908082b080808, 0x1919081908080819,
|
3484
|
-
0x1919081908081908, 0x1919081908190808, 0x1919081908191919, 0x1919081919080808,
|
3485
|
-
0x191908191908082b, 0x1919082b08080808, 0x1919082b19081908, 0x1919082b2b2b2b2b,
|
3486
|
-
0x1919190808080819, 0x1919190808081908, 0x1919190808190808, 0x19191908082b0819,
|
3487
|
-
0x1919190819080808, 0x19191908192b0808, 0x191919082b080819, 0x191919082b2b0819,
|
3488
|
-
0x1919191908080808, 0x1919191908082b08, 0x191919192b080808, 0x191919192b082b08,
|
3489
|
-
0x1919192b082b0819, 0x1919192b192b2b08, 0x1919192b2b2b0819, 0x19192b0808080808,
|
3490
|
-
0x19192b0808191908, 0x19192b0819080819, 0x19192b0819190808, 0x19192b082b192b19,
|
3491
|
-
0x19192b1908192b2b, 0x19192b1919080808, 0x19192b191908082b, 0x19192b2b2b081919,
|
3492
|
-
0x192b080808080819, 0x192b080808081908, 0x192b080808190808, 0x192b080819080808,
|
3493
|
-
0x192b080819191908, 0x192b0808192b082b, 0x192b08082b08192b, 0x192b08082b2b2b19,
|
3494
|
-
0x192b081908080808, 0x192b082b082b1908, 0x192b082b19082b2b, 0x192b082b2b19082b,
|
3495
|
-
0x192b190808080808, 0x192b19080819192b, 0x192b191908190808, 0x192b191919080808,
|
3496
|
-
0x192b191919081919, 0x192b19192b2b1908, 0x192b2b0808080819, 0x192b2b08192b2b2b,
|
3497
|
-
0x192b2b19082b1919, 0x192b2b2b0808192b, 0x192b2b2b19191908, 0x192b2b2b192b082b,
|
3498
|
-
0x2b08080808080808, 0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08,
|
3499
|
-
0x2b08080808190819, 0x2b08080808191908, 0x2b080808082b0808, 0x2b080808082b2b2b,
|
3500
|
-
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808082b080808,
|
3501
|
-
0x2b0808082b08082b, 0x2b0808082b2b2b08, 0x2b0808082b2b2b2b, 0x2b08081908080819,
|
3502
|
-
0x2b08081908081908, 0x2b0808190808192b, 0x2b08081908190808, 0x2b08081919080808,
|
3503
|
-
0x2b08081919190819, 0x2b08081919192b19, 0x2b08082b08080808, 0x2b08082b082b0808,
|
3504
|
-
0x2b08082b2b080808, 0x2b08082b2b08082b, 0x2b08082b2b2b0808, 0x2b08082b2b2b2b08,
|
3505
|
-
0x2b08190808080819, 0x2b08190808081908, 0x2b08190808190808, 0x2b0819080819082b,
|
3506
|
-
0x2b08190808191919, 0x2b08190819080808, 0x2b081908192b0808, 0x2b0819082b082b19,
|
3507
|
-
0x2b08191908080808, 0x2b08191919081908, 0x2b0819192b2b1919, 0x2b08192b08192b08,
|
3508
|
-
0x2b08192b192b2b2b, 0x2b082b0808080808, 0x2b082b0808082b08, 0x2b082b08082b1919,
|
3509
|
-
0x2b082b0819192b2b, 0x2b082b082b080808, 0x2b082b082b08082b, 0x2b082b082b2b2b08,
|
3510
|
-
0x2b082b190808192b, 0x2b082b2b082b082b, 0x2b082b2b2b080808, 0x2b082b2b2b082b08,
|
3511
|
-
0x2b082b2b2b19192b, 0x2b082b2b2b2b2b08, 0x2b19080808080819, 0x2b19080808081908,
|
3512
|
-
0x2b19080808190808, 0x2b19080819080808, 0x2b1908081919192b, 0x2b1908082b081908,
|
3513
|
-
0x2b19081908080808, 0x2b190819082b082b, 0x2b190819192b1908, 0x2b19082b1919192b,
|
3514
|
-
0x2b19082b2b082b19, 0x2b19190808080808, 0x2b19190808081919, 0x2b19190819081908,
|
3515
|
-
0x2b19190819190808, 0x2b19190819192b08, 0x2b191919082b2b19, 0x2b1919192b190808,
|
3516
|
-
0x2b1919192b19082b, 0x2b19192b19080819, 0x2b192b0819190819, 0x2b192b082b2b192b,
|
3517
|
-
0x2b192b1919082b19, 0x2b192b2b08191919, 0x2b192b2b192b0808, 0x2b2b080808080808,
|
3518
|
-
0x2b2b08080808082b, 0x2b2b080808082b08, 0x2b2b080808082b2b, 0x2b2b0808082b0808,
|
3519
|
-
0x2b2b0808082b2b2b, 0x2b2b08082b2b0808, 0x2b2b081919190819, 0x2b2b081919192b19,
|
3520
|
-
0x2b2b08192b2b192b, 0x2b2b082b08080808, 0x2b2b082b0808082b, 0x2b2b082b08082b08,
|
3521
|
-
0x2b2b082b082b2b2b, 0x2b2b082b2b080808, 0x2b2b082b2b2b0808, 0x2b2b190819080808,
|
3522
|
-
0x2b2b19082b191919, 0x2b2b192b192b1919, 0x2b2b192b2b192b08, 0x2b2b2b0808082b2b,
|
3523
|
-
0x2b2b2b08082b0808, 0x2b2b2b08082b082b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b0808,
|
3524
|
-
0x2b2b2b082b2b2b08, 0x2b2b2b1908081908, 0x2b2b2b192b081908, 0x2b2b2b192b08192b,
|
3525
|
-
0x2b2b2b2b082b2b08, 0x2b2b2b2b082b2b2b, 0x2b2b2b2b2b190819, 0x2b2b2b2b2b2b2b2b,
|
3526
|
-
};
|
3527
|
-
|
3528
|
-
static const uint64_t iq2s_grid[1024] = {
|
3529
|
-
0x0808080808080808, 0x080808080808082b, 0x0808080808081919, 0x0808080808082b08,
|
3530
|
-
0x0808080808082b2b, 0x0808080808190819, 0x0808080808191908, 0x080808080819192b,
|
3531
|
-
0x0808080808192b19, 0x08080808082b0808, 0x08080808082b082b, 0x08080808082b1919,
|
3532
|
-
0x08080808082b2b08, 0x0808080819080819, 0x0808080819081908, 0x080808081908192b,
|
3533
|
-
0x0808080819082b19, 0x0808080819190808, 0x080808081919082b, 0x0808080819191919,
|
3534
|
-
0x0808080819192b08, 0x08080808192b0819, 0x08080808192b1908, 0x08080808192b192b,
|
3535
|
-
0x08080808192b2b19, 0x080808082b080808, 0x080808082b08082b, 0x080808082b081919,
|
3536
|
-
0x080808082b082b08, 0x080808082b190819, 0x080808082b191908, 0x080808082b2b0808,
|
3537
|
-
0x080808082b2b1919, 0x080808082b2b2b2b, 0x0808081908080819, 0x0808081908081908,
|
3538
|
-
0x080808190808192b, 0x0808081908082b19, 0x0808081908190808, 0x080808190819082b,
|
3539
|
-
0x0808081908191919, 0x0808081908192b08, 0x08080819082b0819, 0x08080819082b1908,
|
3540
|
-
0x0808081919080808, 0x080808191908082b, 0x0808081919081919, 0x0808081919082b08,
|
3541
|
-
0x0808081919190819, 0x0808081919191908, 0x080808191919192b, 0x0808081919192b19,
|
3542
|
-
0x08080819192b0808, 0x08080819192b1919, 0x08080819192b2b08, 0x080808192b080819,
|
3543
|
-
0x080808192b081908, 0x080808192b190808, 0x080808192b19082b, 0x080808192b191919,
|
3544
|
-
0x080808192b2b0819, 0x080808192b2b1908, 0x0808082b08080808, 0x0808082b0808082b,
|
3545
|
-
0x0808082b08081919, 0x0808082b08082b08, 0x0808082b08190819, 0x0808082b08191908,
|
3546
|
-
0x0808082b082b0808, 0x0808082b082b2b2b, 0x0808082b19080819, 0x0808082b19081908,
|
3547
|
-
0x0808082b1908192b, 0x0808082b19082b19, 0x0808082b19190808, 0x0808082b19191919,
|
3548
|
-
0x0808082b2b080808, 0x0808082b2b081919, 0x0808082b2b082b2b, 0x0808082b2b191908,
|
3549
|
-
0x0808082b2b2b082b, 0x0808190808080819, 0x0808190808081908, 0x080819080808192b,
|
3550
|
-
0x0808190808082b19, 0x0808190808190808, 0x080819080819082b, 0x0808190808191919,
|
3551
|
-
0x0808190808192b08, 0x08081908082b0819, 0x08081908082b1908, 0x08081908082b192b,
|
3552
|
-
0x08081908082b2b19, 0x0808190819080808, 0x080819081908082b, 0x0808190819081919,
|
3553
|
-
0x0808190819082b08, 0x0808190819082b2b, 0x0808190819190819, 0x0808190819191908,
|
3554
|
-
0x080819081919192b, 0x0808190819192b19, 0x08081908192b0808, 0x08081908192b082b,
|
3555
|
-
0x08081908192b1919, 0x080819082b080819, 0x080819082b081908, 0x080819082b08192b,
|
3556
|
-
0x080819082b082b19, 0x080819082b190808, 0x080819082b191919, 0x080819082b192b08,
|
3557
|
-
0x080819082b2b0819, 0x080819082b2b1908, 0x0808191908080808, 0x080819190808082b,
|
3558
|
-
0x0808191908081919, 0x0808191908082b08, 0x0808191908082b2b, 0x0808191908190819,
|
3559
|
-
0x0808191908191908, 0x080819190819192b, 0x0808191908192b19, 0x08081919082b0808,
|
3560
|
-
0x08081919082b1919, 0x08081919082b2b08, 0x0808191919080819, 0x0808191919081908,
|
3561
|
-
0x080819191908192b, 0x0808191919082b19, 0x0808191919190808, 0x080819191919082b,
|
3562
|
-
0x0808191919191919, 0x0808191919192b08, 0x08081919192b0819, 0x08081919192b1908,
|
3563
|
-
0x080819192b080808, 0x080819192b08082b, 0x080819192b081919, 0x080819192b082b08,
|
3564
|
-
0x080819192b190819, 0x080819192b191908, 0x080819192b2b0808, 0x0808192b08080819,
|
3565
|
-
0x0808192b08081908, 0x0808192b0808192b, 0x0808192b08082b19, 0x0808192b08190808,
|
3566
|
-
0x0808192b08191919, 0x0808192b19080808, 0x0808192b19081919, 0x0808192b19082b08,
|
3567
|
-
0x0808192b19190819, 0x0808192b19191908, 0x0808192b192b0808, 0x0808192b2b080819,
|
3568
|
-
0x0808192b2b081908, 0x0808192b2b190808, 0x08082b0808080808, 0x08082b080808082b,
|
3569
|
-
0x08082b0808081919, 0x08082b0808082b08, 0x08082b0808190819, 0x08082b0808191908,
|
3570
|
-
0x08082b080819192b, 0x08082b0808192b19, 0x08082b08082b0808, 0x08082b08082b1919,
|
3571
|
-
0x08082b08082b2b2b, 0x08082b0819080819, 0x08082b0819081908, 0x08082b081908192b,
|
3572
|
-
0x08082b0819082b19, 0x08082b0819190808, 0x08082b081919082b, 0x08082b0819191919,
|
3573
|
-
0x08082b0819192b08, 0x08082b08192b0819, 0x08082b08192b1908, 0x08082b082b080808,
|
3574
|
-
0x08082b082b081919, 0x08082b082b191908, 0x08082b082b2b2b2b, 0x08082b1908080819,
|
3575
|
-
0x08082b1908081908, 0x08082b1908190808, 0x08082b190819082b, 0x08082b1908191919,
|
3576
|
-
0x08082b1908192b08, 0x08082b19082b0819, 0x08082b1919080808, 0x08082b1919081919,
|
3577
|
-
0x08082b1919082b08, 0x08082b1919190819, 0x08082b1919191908, 0x08082b19192b0808,
|
3578
|
-
0x08082b192b080819, 0x08082b192b190808, 0x08082b2b08080808, 0x08082b2b08190819,
|
3579
|
-
0x08082b2b08191908, 0x08082b2b082b082b, 0x08082b2b082b2b08, 0x08082b2b082b2b2b,
|
3580
|
-
0x08082b2b19190808, 0x08082b2b2b192b19, 0x0819080808080819, 0x0819080808081908,
|
3581
|
-
0x081908080808192b, 0x0819080808082b19, 0x0819080808190808, 0x081908080819082b,
|
3582
|
-
0x0819080808191919, 0x0819080808192b08, 0x08190808082b0819, 0x08190808082b1908,
|
3583
|
-
0x08190808082b192b, 0x0819080819080808, 0x081908081908082b, 0x0819080819081919,
|
3584
|
-
0x0819080819082b08, 0x0819080819190819, 0x0819080819191908, 0x081908081919192b,
|
3585
|
-
0x0819080819192b19, 0x08190808192b0808, 0x08190808192b082b, 0x08190808192b1919,
|
3586
|
-
0x08190808192b2b08, 0x081908082b080819, 0x081908082b081908, 0x081908082b08192b,
|
3587
|
-
0x081908082b190808, 0x081908082b191919, 0x081908082b192b08, 0x081908082b2b0819,
|
3588
|
-
0x081908082b2b1908, 0x0819081908080808, 0x081908190808082b, 0x0819081908081919,
|
3589
|
-
0x0819081908082b08, 0x0819081908082b2b, 0x0819081908190819, 0x0819081908191908,
|
3590
|
-
0x081908190819192b, 0x0819081908192b19, 0x08190819082b0808, 0x08190819082b082b,
|
3591
|
-
0x08190819082b1919, 0x08190819082b2b08, 0x0819081919080819, 0x0819081919081908,
|
3592
|
-
0x081908191908192b, 0x0819081919082b19, 0x0819081919190808, 0x081908191919082b,
|
3593
|
-
0x0819081919191919, 0x0819081919192b08, 0x08190819192b0819, 0x08190819192b1908,
|
3594
|
-
0x081908192b080808, 0x081908192b08082b, 0x081908192b081919, 0x081908192b082b08,
|
3595
|
-
0x081908192b190819, 0x081908192b191908, 0x0819082b08080819, 0x0819082b08081908,
|
3596
|
-
0x0819082b08082b19, 0x0819082b08190808, 0x0819082b08191919, 0x0819082b082b0819,
|
3597
|
-
0x0819082b082b1908, 0x0819082b19080808, 0x0819082b19081919, 0x0819082b19190819,
|
3598
|
-
0x0819082b19191908, 0x0819082b2b080819, 0x0819082b2b081908, 0x0819082b2b190808,
|
3599
|
-
0x0819190808080808, 0x081919080808082b, 0x0819190808081919, 0x0819190808082b08,
|
3600
|
-
0x0819190808190819, 0x0819190808191908, 0x081919080819192b, 0x0819190808192b19,
|
3601
|
-
0x08191908082b0808, 0x08191908082b1919, 0x08191908082b2b08, 0x0819190819080819,
|
3602
|
-
0x0819190819081908, 0x081919081908192b, 0x0819190819082b19, 0x0819190819190808,
|
3603
|
-
0x081919081919082b, 0x0819190819191919, 0x0819190819192b08, 0x08191908192b0819,
|
3604
|
-
0x08191908192b1908, 0x081919082b080808, 0x081919082b08082b, 0x081919082b081919,
|
3605
|
-
0x081919082b082b08, 0x081919082b190819, 0x081919082b191908, 0x081919082b2b0808,
|
3606
|
-
0x0819191908080819, 0x0819191908081908, 0x081919190808192b, 0x0819191908082b19,
|
3607
|
-
0x0819191908190808, 0x081919190819082b, 0x0819191908191919, 0x0819191908192b08,
|
3608
|
-
0x08191919082b0819, 0x08191919082b1908, 0x0819191919080808, 0x081919191908082b,
|
3609
|
-
0x0819191919081919, 0x0819191919082b08, 0x0819191919190819, 0x0819191919191908,
|
3610
|
-
0x08191919192b0808, 0x081919192b080819, 0x081919192b081908, 0x081919192b190808,
|
3611
|
-
0x0819192b08080808, 0x0819192b08081919, 0x0819192b08082b08, 0x0819192b08190819,
|
3612
|
-
0x0819192b08191908, 0x0819192b082b0808, 0x0819192b19080819, 0x0819192b19081908,
|
3613
|
-
0x0819192b19190808, 0x0819192b2b080808, 0x0819192b2b2b2b2b, 0x08192b0808080819,
|
3614
|
-
0x08192b0808081908, 0x08192b080808192b, 0x08192b0808082b19, 0x08192b0808190808,
|
3615
|
-
0x08192b0808191919, 0x08192b0808192b08, 0x08192b08082b0819, 0x08192b0819080808,
|
3616
|
-
0x08192b081908082b, 0x08192b0819081919, 0x08192b0819082b08, 0x08192b0819190819,
|
3617
|
-
0x08192b0819191908, 0x08192b08192b0808, 0x08192b082b080819, 0x08192b082b081908,
|
3618
|
-
0x08192b1908080808, 0x08192b190808082b, 0x08192b1908081919, 0x08192b1908082b08,
|
3619
|
-
0x08192b1908190819, 0x08192b1908191908, 0x08192b19082b0808, 0x08192b1919080819,
|
3620
|
-
0x08192b1919081908, 0x08192b1919190808, 0x08192b19192b2b19, 0x08192b192b2b082b,
|
3621
|
-
0x08192b2b08081908, 0x08192b2b08190808, 0x08192b2b19080808, 0x08192b2b1919192b,
|
3622
|
-
0x082b080808080808, 0x082b08080808082b, 0x082b080808081919, 0x082b080808082b08,
|
3623
|
-
0x082b080808190819, 0x082b080808191908, 0x082b08080819192b, 0x082b080808192b19,
|
3624
|
-
0x082b0808082b0808, 0x082b0808082b1919, 0x082b0808082b2b2b, 0x082b080819080819,
|
3625
|
-
0x082b080819081908, 0x082b080819190808, 0x082b08081919082b, 0x082b080819191919,
|
3626
|
-
0x082b0808192b1908, 0x082b08082b080808, 0x082b08082b082b2b, 0x082b08082b191908,
|
3627
|
-
0x082b08082b2b2b2b, 0x082b081908080819, 0x082b081908081908, 0x082b081908190808,
|
3628
|
-
0x082b08190819082b, 0x082b081908191919, 0x082b0819082b0819, 0x082b081919080808,
|
3629
|
-
0x082b08191908082b, 0x082b081919081919, 0x082b081919190819, 0x082b081919191908,
|
3630
|
-
0x082b0819192b0808, 0x082b08192b080819, 0x082b08192b081908, 0x082b08192b190808,
|
3631
|
-
0x082b082b08080808, 0x082b082b08082b2b, 0x082b082b082b082b, 0x082b082b082b2b08,
|
3632
|
-
0x082b082b082b2b2b, 0x082b082b19081908, 0x082b082b19190808, 0x082b082b2b082b08,
|
3633
|
-
0x082b082b2b082b2b, 0x082b082b2b2b2b08, 0x082b190808080819, 0x082b190808081908,
|
3634
|
-
0x082b19080808192b, 0x082b190808082b19, 0x082b190808190808, 0x082b190808191919,
|
3635
|
-
0x082b190808192b08, 0x082b1908082b0819, 0x082b1908082b1908, 0x082b190819080808,
|
3636
|
-
0x082b19081908082b, 0x082b190819081919, 0x082b190819082b08, 0x082b190819190819,
|
3637
|
-
0x082b190819191908, 0x082b1908192b0808, 0x082b19082b080819, 0x082b19082b081908,
|
3638
|
-
0x082b19082b190808, 0x082b191908080808, 0x082b191908081919, 0x082b191908082b08,
|
3639
|
-
0x082b191908190819, 0x082b191908191908, 0x082b1919082b0808, 0x082b191919080819,
|
3640
|
-
0x082b191919081908, 0x082b191919190808, 0x082b1919192b192b, 0x082b19192b080808,
|
3641
|
-
0x082b192b08080819, 0x082b192b08081908, 0x082b192b08190808, 0x082b192b19080808,
|
3642
|
-
0x082b192b19192b19, 0x082b2b0808080808, 0x082b2b0808081919, 0x082b2b0808190819,
|
3643
|
-
0x082b2b0808191908, 0x082b2b0819080819, 0x082b2b0819081908, 0x082b2b0819190808,
|
3644
|
-
0x082b2b082b082b2b, 0x082b2b082b2b2b2b, 0x082b2b1908080819, 0x082b2b1908081908,
|
3645
|
-
0x082b2b1908190808, 0x082b2b192b191919, 0x082b2b2b08082b2b, 0x082b2b2b082b082b,
|
3646
|
-
0x082b2b2b192b1908, 0x082b2b2b2b082b08, 0x082b2b2b2b082b2b, 0x1908080808080819,
|
3647
|
-
0x1908080808081908, 0x190808080808192b, 0x1908080808082b19, 0x1908080808190808,
|
3648
|
-
0x190808080819082b, 0x1908080808191919, 0x1908080808192b08, 0x1908080808192b2b,
|
3649
|
-
0x19080808082b0819, 0x19080808082b1908, 0x19080808082b192b, 0x1908080819080808,
|
3650
|
-
0x190808081908082b, 0x1908080819081919, 0x1908080819082b08, 0x1908080819082b2b,
|
3651
|
-
0x1908080819190819, 0x1908080819191908, 0x190808081919192b, 0x1908080819192b19,
|
3652
|
-
0x19080808192b0808, 0x19080808192b082b, 0x19080808192b1919, 0x190808082b080819,
|
3653
|
-
0x190808082b081908, 0x190808082b190808, 0x190808082b191919, 0x190808082b192b08,
|
3654
|
-
0x190808082b2b0819, 0x190808082b2b1908, 0x1908081908080808, 0x190808190808082b,
|
3655
|
-
0x1908081908081919, 0x1908081908082b08, 0x1908081908190819, 0x1908081908191908,
|
3656
|
-
0x190808190819192b, 0x1908081908192b19, 0x19080819082b0808, 0x19080819082b082b,
|
3657
|
-
0x19080819082b1919, 0x1908081919080819, 0x1908081919081908, 0x190808191908192b,
|
3658
|
-
0x1908081919082b19, 0x1908081919190808, 0x190808191919082b, 0x1908081919191919,
|
3659
|
-
0x1908081919192b08, 0x19080819192b0819, 0x19080819192b1908, 0x190808192b080808,
|
3660
|
-
0x190808192b08082b, 0x190808192b081919, 0x190808192b082b08, 0x190808192b190819,
|
3661
|
-
0x190808192b191908, 0x190808192b2b0808, 0x1908082b08080819, 0x1908082b08081908,
|
3662
|
-
0x1908082b08190808, 0x1908082b0819082b, 0x1908082b08191919, 0x1908082b08192b08,
|
3663
|
-
0x1908082b082b1908, 0x1908082b19080808, 0x1908082b19081919, 0x1908082b19082b08,
|
3664
|
-
0x1908082b19190819, 0x1908082b19191908, 0x1908082b192b0808, 0x1908082b2b080819,
|
3665
|
-
0x1908082b2b081908, 0x1908190808080808, 0x190819080808082b, 0x1908190808081919,
|
3666
|
-
0x1908190808082b08, 0x1908190808082b2b, 0x1908190808190819, 0x1908190808191908,
|
3667
|
-
0x190819080819192b, 0x1908190808192b19, 0x19081908082b0808, 0x19081908082b082b,
|
3668
|
-
0x19081908082b1919, 0x19081908082b2b08, 0x1908190819080819, 0x1908190819081908,
|
3669
|
-
0x190819081908192b, 0x1908190819082b19, 0x1908190819190808, 0x190819081919082b,
|
3670
|
-
0x1908190819191919, 0x1908190819192b08, 0x19081908192b0819, 0x19081908192b1908,
|
3671
|
-
0x190819082b080808, 0x190819082b08082b, 0x190819082b081919, 0x190819082b082b08,
|
3672
|
-
0x190819082b190819, 0x190819082b191908, 0x190819082b2b0808, 0x1908191908080819,
|
3673
|
-
0x1908191908081908, 0x190819190808192b, 0x1908191908082b19, 0x1908191908190808,
|
3674
|
-
0x190819190819082b, 0x1908191908191919, 0x1908191908192b08, 0x19081919082b0819,
|
3675
|
-
0x19081919082b1908, 0x1908191919080808, 0x190819191908082b, 0x1908191919081919,
|
3676
|
-
0x1908191919082b08, 0x1908191919190819, 0x1908191919191908, 0x19081919192b0808,
|
3677
|
-
0x19081919192b2b2b, 0x190819192b080819, 0x190819192b081908, 0x190819192b190808,
|
3678
|
-
0x1908192b08080808, 0x1908192b0808082b, 0x1908192b08081919, 0x1908192b08082b08,
|
3679
|
-
0x1908192b08190819, 0x1908192b08191908, 0x1908192b082b0808, 0x1908192b19080819,
|
3680
|
-
0x1908192b19081908, 0x1908192b19190808, 0x1908192b2b080808, 0x1908192b2b2b1919,
|
3681
|
-
0x19082b0808080819, 0x19082b0808081908, 0x19082b0808082b19, 0x19082b0808190808,
|
3682
|
-
0x19082b080819082b, 0x19082b0808191919, 0x19082b0808192b08, 0x19082b08082b0819,
|
3683
|
-
0x19082b08082b1908, 0x19082b0819080808, 0x19082b081908082b, 0x19082b0819081919,
|
3684
|
-
0x19082b0819082b08, 0x19082b0819190819, 0x19082b0819191908, 0x19082b08192b0808,
|
3685
|
-
0x19082b082b081908, 0x19082b082b190808, 0x19082b1908080808, 0x19082b190808082b,
|
3686
|
-
0x19082b1908081919, 0x19082b1908082b08, 0x19082b1908190819, 0x19082b1908191908,
|
3687
|
-
0x19082b19082b0808, 0x19082b1919080819, 0x19082b1919081908, 0x19082b1919190808,
|
3688
|
-
0x19082b192b080808, 0x19082b192b19192b, 0x19082b2b08080819, 0x19082b2b08081908,
|
3689
|
-
0x19082b2b08190808, 0x19082b2b19080808, 0x1919080808080808, 0x191908080808082b,
|
3690
|
-
0x1919080808081919, 0x1919080808082b08, 0x1919080808190819, 0x1919080808191908,
|
3691
|
-
0x191908080819192b, 0x1919080808192b19, 0x19190808082b0808, 0x19190808082b082b,
|
3692
|
-
0x19190808082b1919, 0x19190808082b2b08, 0x1919080819080819, 0x1919080819081908,
|
3693
|
-
0x191908081908192b, 0x1919080819082b19, 0x1919080819190808, 0x191908081919082b,
|
3694
|
-
0x1919080819191919, 0x1919080819192b08, 0x19190808192b0819, 0x19190808192b1908,
|
3695
|
-
0x191908082b080808, 0x191908082b08082b, 0x191908082b081919, 0x191908082b082b08,
|
3696
|
-
0x191908082b190819, 0x191908082b191908, 0x1919081908080819, 0x1919081908081908,
|
3697
|
-
0x191908190808192b, 0x1919081908082b19, 0x1919081908190808, 0x191908190819082b,
|
3698
|
-
0x1919081908191919, 0x1919081908192b08, 0x19190819082b0819, 0x19190819082b1908,
|
3699
|
-
0x1919081919080808, 0x191908191908082b, 0x1919081919081919, 0x1919081919082b08,
|
3700
|
-
0x1919081919190819, 0x1919081919191908, 0x19190819192b0808, 0x191908192b080819,
|
3701
|
-
0x191908192b081908, 0x191908192b190808, 0x1919082b08080808, 0x1919082b08081919,
|
3702
|
-
0x1919082b08082b08, 0x1919082b08190819, 0x1919082b08191908, 0x1919082b082b0808,
|
3703
|
-
0x1919082b19080819, 0x1919082b19081908, 0x1919082b19190808, 0x1919082b192b2b19,
|
3704
|
-
0x1919082b2b080808, 0x1919190808080819, 0x1919190808081908, 0x191919080808192b,
|
3705
|
-
0x1919190808082b19, 0x1919190808190808, 0x191919080819082b, 0x1919190808191919,
|
3706
|
-
0x1919190808192b08, 0x19191908082b0819, 0x19191908082b1908, 0x1919190819080808,
|
3707
|
-
0x191919081908082b, 0x1919190819081919, 0x1919190819082b08, 0x1919190819190819,
|
3708
|
-
0x1919190819191908, 0x19191908192b0808, 0x191919082b080819, 0x191919082b081908,
|
3709
|
-
0x191919082b190808, 0x1919191908080808, 0x191919190808082b, 0x1919191908081919,
|
3710
|
-
0x1919191908082b08, 0x1919191908190819, 0x1919191908191908, 0x19191919082b0808,
|
3711
|
-
0x1919191919080819, 0x1919191919081908, 0x1919191919190808, 0x191919192b080808,
|
3712
|
-
0x1919192b08080819, 0x1919192b08081908, 0x1919192b08190808, 0x1919192b082b192b,
|
3713
|
-
0x1919192b19080808, 0x19192b0808080808, 0x19192b080808082b, 0x19192b0808081919,
|
3714
|
-
0x19192b0808082b08, 0x19192b0808190819, 0x19192b0808191908, 0x19192b08082b0808,
|
3715
|
-
0x19192b0819080819, 0x19192b0819081908, 0x19192b0819190808, 0x19192b0819192b2b,
|
3716
|
-
0x19192b082b080808, 0x19192b1908080819, 0x19192b1908081908, 0x19192b1908190808,
|
3717
|
-
0x19192b1919080808, 0x19192b2b08080808, 0x19192b2b08192b19, 0x19192b2b2b081919,
|
3718
|
-
0x19192b2b2b2b2b08, 0x192b080808080819, 0x192b080808081908, 0x192b08080808192b,
|
3719
|
-
0x192b080808190808, 0x192b08080819082b, 0x192b080808191919, 0x192b080808192b08,
|
3720
|
-
0x192b0808082b0819, 0x192b0808082b1908, 0x192b080819080808, 0x192b080819081919,
|
3721
|
-
0x192b080819082b08, 0x192b080819190819, 0x192b080819191908, 0x192b0808192b0808,
|
3722
|
-
0x192b08082b081908, 0x192b08082b190808, 0x192b081908080808, 0x192b08190808082b,
|
3723
|
-
0x192b081908081919, 0x192b081908082b08, 0x192b081908190819, 0x192b081908191908,
|
3724
|
-
0x192b0819082b0808, 0x192b081919080819, 0x192b081919081908, 0x192b081919190808,
|
3725
|
-
0x192b08192b080808, 0x192b08192b192b19, 0x192b082b08081908, 0x192b082b08190808,
|
3726
|
-
0x192b082b19080808, 0x192b082b1919192b, 0x192b082b2b2b0819, 0x192b190808080808,
|
3727
|
-
0x192b190808081919, 0x192b190808082b08, 0x192b190808190819, 0x192b190808191908,
|
3728
|
-
0x192b1908082b0808, 0x192b190819080819, 0x192b190819081908, 0x192b190819190808,
|
3729
|
-
0x192b19082b080808, 0x192b191908080819, 0x192b191908081908, 0x192b191908190808,
|
3730
|
-
0x192b191919080808, 0x192b191919082b2b, 0x192b1919192b2b08, 0x192b19192b19082b,
|
3731
|
-
0x192b192b08080808, 0x192b192b2b191908, 0x192b2b0808080819, 0x192b2b0808081908,
|
3732
|
-
0x192b2b0808190808, 0x192b2b08192b1919, 0x192b2b082b192b08, 0x192b2b1908080808,
|
3733
|
-
0x192b2b19082b2b2b, 0x192b2b2b1908082b, 0x192b2b2b2b2b0819, 0x2b08080808080808,
|
3734
|
-
0x2b0808080808082b, 0x2b08080808081919, 0x2b08080808082b08, 0x2b08080808190819,
|
3735
|
-
0x2b08080808191908, 0x2b08080808192b19, 0x2b080808082b0808, 0x2b080808082b1919,
|
3736
|
-
0x2b08080819080819, 0x2b08080819081908, 0x2b08080819190808, 0x2b0808081919082b,
|
3737
|
-
0x2b08080819191919, 0x2b08080819192b08, 0x2b080808192b0819, 0x2b0808082b080808,
|
3738
|
-
0x2b0808082b081919, 0x2b0808082b190819, 0x2b0808082b191908, 0x2b08081908080819,
|
3739
|
-
0x2b08081908081908, 0x2b08081908082b19, 0x2b08081908190808, 0x2b0808190819082b,
|
3740
|
-
0x2b08081908191919, 0x2b08081908192b08, 0x2b080819082b0819, 0x2b080819082b1908,
|
3741
|
-
0x2b08081919080808, 0x2b0808191908082b, 0x2b08081919081919, 0x2b08081919082b08,
|
3742
|
-
0x2b08081919190819, 0x2b08081919191908, 0x2b0808192b080819, 0x2b0808192b081908,
|
3743
|
-
0x2b0808192b190808, 0x2b0808192b2b2b19, 0x2b08082b08080808, 0x2b08082b08081919,
|
3744
|
-
0x2b08082b08082b2b, 0x2b08082b08190819, 0x2b08082b08191908, 0x2b08082b19080819,
|
3745
|
-
0x2b08082b19081908, 0x2b08082b19190808, 0x2b08190808080819, 0x2b08190808081908,
|
3746
|
-
0x2b0819080808192b, 0x2b08190808082b19, 0x2b08190808190808, 0x2b0819080819082b,
|
3747
|
-
0x2b08190808191919, 0x2b08190808192b08, 0x2b081908082b0819, 0x2b08190819080808,
|
3748
|
-
0x2b0819081908082b, 0x2b08190819081919, 0x2b08190819082b08, 0x2b08190819190819,
|
3749
|
-
0x2b08190819191908, 0x2b081908192b0808, 0x2b0819082b080819, 0x2b0819082b081908,
|
3750
|
-
0x2b0819082b190808, 0x2b08191908080808, 0x2b0819190808082b, 0x2b08191908081919,
|
3751
|
-
0x2b08191908082b08, 0x2b08191908190819, 0x2b08191908191908, 0x2b081919082b0808,
|
3752
|
-
0x2b08191919080819, 0x2b08191919081908, 0x2b08191919190808, 0x2b0819192b080808,
|
3753
|
-
0x2b0819192b082b2b, 0x2b08192b08080819, 0x2b08192b08081908, 0x2b08192b08190808,
|
3754
|
-
0x2b08192b082b2b19, 0x2b08192b19080808, 0x2b082b0808080808, 0x2b082b0808081919,
|
3755
|
-
0x2b082b0808190819, 0x2b082b0808191908, 0x2b082b0819080819, 0x2b082b0819081908,
|
3756
|
-
0x2b082b0819190808, 0x2b082b082b2b082b, 0x2b082b1908080819, 0x2b082b1908081908,
|
3757
|
-
0x2b082b1919080808, 0x2b082b19192b1919, 0x2b082b2b082b082b, 0x2b082b2b19192b08,
|
3758
|
-
0x2b082b2b19192b2b, 0x2b082b2b2b08082b, 0x2b082b2b2b2b082b, 0x2b19080808080819,
|
3759
|
-
0x2b19080808081908, 0x2b19080808082b19, 0x2b19080808190808, 0x2b1908080819082b,
|
3760
|
-
0x2b19080808191919, 0x2b19080808192b08, 0x2b190808082b1908, 0x2b19080819080808,
|
3761
|
-
0x2b1908081908082b, 0x2b19080819081919, 0x2b19080819082b08, 0x2b19080819190819,
|
3762
|
-
0x2b19080819191908, 0x2b190808192b0808, 0x2b1908082b080819, 0x2b1908082b081908,
|
3763
|
-
0x2b1908082b190808, 0x2b19081908080808, 0x2b19081908081919, 0x2b19081908190819,
|
3764
|
-
0x2b19081908191908, 0x2b19081919080819, 0x2b19081919081908, 0x2b19081919190808,
|
3765
|
-
0x2b19081919192b2b, 0x2b19082b08080819, 0x2b19082b08081908, 0x2b19082b08190808,
|
3766
|
-
0x2b19082b19080808, 0x2b19082b2b2b192b, 0x2b19190808080808, 0x2b1919080808082b,
|
3767
|
-
0x2b19190808081919, 0x2b19190808082b08, 0x2b19190808190819, 0x2b19190808191908,
|
3768
|
-
0x2b191908082b0808, 0x2b19190819080819, 0x2b19190819081908, 0x2b19190819190808,
|
3769
|
-
0x2b1919082b080808, 0x2b1919082b19192b, 0x2b19191908080819, 0x2b19191908081908,
|
3770
|
-
0x2b19191908190808, 0x2b19191919080808, 0x2b1919192b192b08, 0x2b1919192b2b0819,
|
3771
|
-
0x2b19192b08080808, 0x2b19192b1908192b, 0x2b19192b192b1908, 0x2b192b0808080819,
|
3772
|
-
0x2b192b0808081908, 0x2b192b0808190808, 0x2b192b08082b192b, 0x2b192b0819080808,
|
3773
|
-
0x2b192b082b2b2b19, 0x2b192b1908080808, 0x2b192b1919082b19, 0x2b192b191919082b,
|
3774
|
-
0x2b192b2b2b190808, 0x2b2b080808080808, 0x2b2b080808081919, 0x2b2b080808082b2b,
|
3775
|
-
0x2b2b080808191908, 0x2b2b0808082b082b, 0x2b2b0808082b2b2b, 0x2b2b080819080819,
|
3776
|
-
0x2b2b080819081908, 0x2b2b080819190808, 0x2b2b08082b2b082b, 0x2b2b08082b2b2b2b,
|
3777
|
-
0x2b2b081919080808, 0x2b2b0819192b1919, 0x2b2b082b0808082b, 0x2b2b082b08082b2b,
|
3778
|
-
0x2b2b082b082b082b, 0x2b2b082b082b2b08, 0x2b2b082b082b2b2b, 0x2b2b082b2b08082b,
|
3779
|
-
0x2b2b082b2b082b08, 0x2b2b082b2b082b2b, 0x2b2b082b2b2b2b08, 0x2b2b190808080819,
|
3780
|
-
0x2b2b190808081908, 0x2b2b190808190808, 0x2b2b190819080808, 0x2b2b19082b082b19,
|
3781
|
-
0x2b2b19082b2b1908, 0x2b2b191908080808, 0x2b2b191908192b19, 0x2b2b192b19190819,
|
3782
|
-
0x2b2b2b0808082b2b, 0x2b2b2b08082b2b08, 0x2b2b2b082b2b082b, 0x2b2b2b1919191908,
|
3783
|
-
0x2b2b2b192b08192b, 0x2b2b2b2b08082b08, 0x2b2b2b2b08082b2b, 0x2b2b2b2b082b0808,
|
3784
|
-
0x2b2b2b2b082b082b, 0x2b2b2b2b082b2b08, 0x2b2b2b2b2b082b08, 0x2b2b2b2b2b2b2b2b,
|
3785
|
-
};
|
3786
|
-
|
3787
|
-
static const uint32_t iq3xxs_grid[256] = {
|
3788
|
-
0x04040404, 0x04040414, 0x04040424, 0x04040c0c, 0x04040c1c, 0x04040c3e, 0x04041404, 0x04041414,
|
3789
|
-
0x04041c0c, 0x04042414, 0x04043e1c, 0x04043e2c, 0x040c040c, 0x040c041c, 0x040c0c04, 0x040c0c14,
|
3790
|
-
0x040c140c, 0x040c142c, 0x040c1c04, 0x040c1c14, 0x040c240c, 0x040c2c24, 0x040c3e04, 0x04140404,
|
3791
|
-
0x04140414, 0x04140424, 0x04140c0c, 0x04141404, 0x04141414, 0x04141c0c, 0x04141c1c, 0x04141c3e,
|
3792
|
-
0x04142c0c, 0x04142c3e, 0x04143e2c, 0x041c040c, 0x041c043e, 0x041c0c04, 0x041c0c14, 0x041c142c,
|
3793
|
-
0x041c3e04, 0x04240c1c, 0x04241c3e, 0x04242424, 0x04242c3e, 0x04243e1c, 0x04243e2c, 0x042c040c,
|
3794
|
-
0x042c043e, 0x042c1c14, 0x042c2c14, 0x04341c2c, 0x04343424, 0x043e0c04, 0x043e0c24, 0x043e0c34,
|
3795
|
-
0x043e241c, 0x043e340c, 0x0c04040c, 0x0c04041c, 0x0c040c04, 0x0c040c14, 0x0c04140c, 0x0c04141c,
|
3796
|
-
0x0c041c04, 0x0c041c14, 0x0c041c24, 0x0c04243e, 0x0c042c04, 0x0c0c0404, 0x0c0c0414, 0x0c0c0c0c,
|
3797
|
-
0x0c0c1404, 0x0c0c1414, 0x0c14040c, 0x0c14041c, 0x0c140c04, 0x0c140c14, 0x0c14140c, 0x0c141c04,
|
3798
|
-
0x0c143e14, 0x0c1c0404, 0x0c1c0414, 0x0c1c1404, 0x0c1c1c0c, 0x0c1c2434, 0x0c1c3434, 0x0c24040c,
|
3799
|
-
0x0c24042c, 0x0c242c04, 0x0c2c1404, 0x0c2c1424, 0x0c2c2434, 0x0c2c3e0c, 0x0c34042c, 0x0c3e1414,
|
3800
|
-
0x0c3e2404, 0x14040404, 0x14040414, 0x14040c0c, 0x14040c1c, 0x14041404, 0x14041414, 0x14041434,
|
3801
|
-
0x14041c0c, 0x14042414, 0x140c040c, 0x140c041c, 0x140c042c, 0x140c0c04, 0x140c0c14, 0x140c140c,
|
3802
|
-
0x140c1c04, 0x140c341c, 0x140c343e, 0x140c3e04, 0x14140404, 0x14140414, 0x14140c0c, 0x14140c3e,
|
3803
|
-
0x14141404, 0x14141414, 0x14141c3e, 0x14142404, 0x14142c2c, 0x141c040c, 0x141c0c04, 0x141c0c24,
|
3804
|
-
0x141c3e04, 0x141c3e24, 0x14241c2c, 0x14242c1c, 0x142c041c, 0x142c143e, 0x142c240c, 0x142c3e24,
|
3805
|
-
0x143e040c, 0x143e041c, 0x143e0c34, 0x143e242c, 0x1c04040c, 0x1c040c04, 0x1c040c14, 0x1c04140c,
|
3806
|
-
0x1c04141c, 0x1c042c04, 0x1c04342c, 0x1c043e14, 0x1c0c0404, 0x1c0c0414, 0x1c0c1404, 0x1c0c1c0c,
|
3807
|
-
0x1c0c2424, 0x1c0c2434, 0x1c14040c, 0x1c14041c, 0x1c140c04, 0x1c14142c, 0x1c142c14, 0x1c143e14,
|
3808
|
-
0x1c1c0c0c, 0x1c1c1c1c, 0x1c241c04, 0x1c24243e, 0x1c243e14, 0x1c2c0404, 0x1c2c0434, 0x1c2c1414,
|
3809
|
-
0x1c2c2c2c, 0x1c340c24, 0x1c341c34, 0x1c34341c, 0x1c3e1c1c, 0x1c3e3404, 0x24040424, 0x24040c3e,
|
3810
|
-
0x24041c2c, 0x24041c3e, 0x24042c1c, 0x24042c3e, 0x240c3e24, 0x24141404, 0x24141c3e, 0x24142404,
|
3811
|
-
0x24143404, 0x24143434, 0x241c043e, 0x241c242c, 0x24240424, 0x24242c0c, 0x24243424, 0x242c142c,
|
3812
|
-
0x242c241c, 0x242c3e04, 0x243e042c, 0x243e0c04, 0x243e0c14, 0x243e1c04, 0x2c040c14, 0x2c04240c,
|
3813
|
-
0x2c043e04, 0x2c0c0404, 0x2c0c0434, 0x2c0c1434, 0x2c0c2c2c, 0x2c140c24, 0x2c141c14, 0x2c143e14,
|
3814
|
-
0x2c1c0414, 0x2c1c2c1c, 0x2c240c04, 0x2c24141c, 0x2c24143e, 0x2c243e14, 0x2c2c0414, 0x2c2c1c0c,
|
3815
|
-
0x2c342c04, 0x2c3e1424, 0x2c3e2414, 0x34041424, 0x34042424, 0x34042434, 0x34043424, 0x340c140c,
|
3816
|
-
0x340c340c, 0x34140c3e, 0x34143424, 0x341c1c04, 0x341c1c34, 0x34242424, 0x342c042c, 0x342c2c14,
|
3817
|
-
0x34341c1c, 0x343e041c, 0x343e140c, 0x3e04041c, 0x3e04042c, 0x3e04043e, 0x3e040c04, 0x3e041c14,
|
3818
|
-
0x3e042c14, 0x3e0c1434, 0x3e0c2404, 0x3e140c14, 0x3e14242c, 0x3e142c14, 0x3e1c0404, 0x3e1c0c2c,
|
3819
|
-
0x3e1c1c1c, 0x3e1c3404, 0x3e24140c, 0x3e24240c, 0x3e2c0404, 0x3e2c0414, 0x3e2c1424, 0x3e341c04,
|
3820
|
-
};
|
3821
|
-
|
3822
|
-
static const uint32_t iq3s_grid[512] = {
|
3823
|
-
0x01010101, 0x01010103, 0x01010105, 0x0101010b, 0x0101010f, 0x01010301, 0x01010303, 0x01010305,
|
3824
|
-
0x01010309, 0x0101030d, 0x01010501, 0x01010503, 0x0101050b, 0x01010707, 0x01010901, 0x01010905,
|
3825
|
-
0x0101090b, 0x0101090f, 0x01010b03, 0x01010b07, 0x01010d01, 0x01010d05, 0x01010f03, 0x01010f09,
|
3826
|
-
0x01010f0f, 0x01030101, 0x01030103, 0x01030105, 0x01030109, 0x01030301, 0x01030303, 0x0103030b,
|
3827
|
-
0x01030501, 0x01030507, 0x0103050f, 0x01030703, 0x0103070b, 0x01030909, 0x01030d03, 0x01030d0b,
|
3828
|
-
0x01030f05, 0x01050101, 0x01050103, 0x0105010b, 0x0105010f, 0x01050301, 0x01050307, 0x0105030d,
|
3829
|
-
0x01050503, 0x0105050b, 0x01050701, 0x01050709, 0x01050905, 0x0105090b, 0x0105090f, 0x01050b03,
|
3830
|
-
0x01050b07, 0x01050f01, 0x01050f07, 0x01070107, 0x01070303, 0x0107030b, 0x01070501, 0x01070505,
|
3831
|
-
0x01070703, 0x01070707, 0x0107070d, 0x01070909, 0x01070b01, 0x01070b05, 0x01070d0f, 0x01070f03,
|
3832
|
-
0x01070f0b, 0x01090101, 0x01090307, 0x0109030f, 0x01090503, 0x01090509, 0x01090705, 0x01090901,
|
3833
|
-
0x01090907, 0x01090b03, 0x01090f01, 0x010b0105, 0x010b0109, 0x010b0501, 0x010b0505, 0x010b050d,
|
3834
|
-
0x010b0707, 0x010b0903, 0x010b090b, 0x010b090f, 0x010b0d0d, 0x010b0f07, 0x010d010d, 0x010d0303,
|
3835
|
-
0x010d0307, 0x010d0703, 0x010d0b05, 0x010d0f03, 0x010f0101, 0x010f0105, 0x010f0109, 0x010f0501,
|
3836
|
-
0x010f0505, 0x010f050d, 0x010f0707, 0x010f0b01, 0x010f0b09, 0x03010101, 0x03010103, 0x03010105,
|
3837
|
-
0x03010109, 0x03010301, 0x03010303, 0x03010307, 0x0301030b, 0x0301030f, 0x03010501, 0x03010505,
|
3838
|
-
0x03010703, 0x03010709, 0x0301070d, 0x03010b09, 0x03010b0d, 0x03010d03, 0x03010f05, 0x03030101,
|
3839
|
-
0x03030103, 0x03030107, 0x0303010d, 0x03030301, 0x03030309, 0x03030503, 0x03030701, 0x03030707,
|
3840
|
-
0x03030903, 0x03030b01, 0x03030b05, 0x03030f01, 0x03030f0d, 0x03050101, 0x03050305, 0x0305030b,
|
3841
|
-
0x0305030f, 0x03050501, 0x03050509, 0x03050705, 0x03050901, 0x03050907, 0x03050b0b, 0x03050d01,
|
3842
|
-
0x03050f05, 0x03070103, 0x03070109, 0x0307010f, 0x03070301, 0x03070307, 0x03070503, 0x0307050f,
|
3843
|
-
0x03070701, 0x03070709, 0x03070903, 0x03070d05, 0x03070f01, 0x03090107, 0x0309010b, 0x03090305,
|
3844
|
-
0x03090309, 0x03090703, 0x03090707, 0x03090905, 0x0309090d, 0x03090b01, 0x03090b09, 0x030b0103,
|
3845
|
-
0x030b0301, 0x030b0307, 0x030b0503, 0x030b0701, 0x030b0705, 0x030b0b03, 0x030d0501, 0x030d0509,
|
3846
|
-
0x030d050f, 0x030d0909, 0x030d090d, 0x030f0103, 0x030f0107, 0x030f0301, 0x030f0305, 0x030f0503,
|
3847
|
-
0x030f070b, 0x030f0903, 0x030f0d05, 0x030f0f01, 0x05010101, 0x05010103, 0x05010107, 0x0501010b,
|
3848
|
-
0x0501010f, 0x05010301, 0x05010305, 0x05010309, 0x0501030d, 0x05010503, 0x05010507, 0x0501050f,
|
3849
|
-
0x05010701, 0x05010705, 0x05010903, 0x05010907, 0x0501090b, 0x05010b01, 0x05010b05, 0x05010d0f,
|
3850
|
-
0x05010f01, 0x05010f07, 0x05010f0b, 0x05030101, 0x05030105, 0x05030301, 0x05030307, 0x0503030f,
|
3851
|
-
0x05030505, 0x0503050b, 0x05030703, 0x05030709, 0x05030905, 0x05030b03, 0x05050103, 0x05050109,
|
3852
|
-
0x0505010f, 0x05050503, 0x05050507, 0x05050701, 0x0505070f, 0x05050903, 0x05050b07, 0x05050b0f,
|
3853
|
-
0x05050f03, 0x05050f09, 0x05070101, 0x05070105, 0x0507010b, 0x05070303, 0x05070505, 0x05070509,
|
3854
|
-
0x05070703, 0x05070707, 0x05070905, 0x05070b01, 0x05070d0d, 0x05090103, 0x0509010f, 0x05090501,
|
3855
|
-
0x05090507, 0x05090705, 0x0509070b, 0x05090903, 0x05090f05, 0x05090f0b, 0x050b0109, 0x050b0303,
|
3856
|
-
0x050b0505, 0x050b070f, 0x050b0901, 0x050b0b07, 0x050b0f01, 0x050d0101, 0x050d0105, 0x050d010f,
|
3857
|
-
0x050d0503, 0x050d0b0b, 0x050d0d03, 0x050f010b, 0x050f0303, 0x050f050d, 0x050f0701, 0x050f0907,
|
3858
|
-
0x050f0b01, 0x07010105, 0x07010303, 0x07010307, 0x0701030b, 0x0701030f, 0x07010505, 0x07010703,
|
3859
|
-
0x07010707, 0x0701070b, 0x07010905, 0x07010909, 0x0701090f, 0x07010b03, 0x07010d07, 0x07010f03,
|
3860
|
-
0x07030103, 0x07030107, 0x0703010b, 0x07030309, 0x07030503, 0x07030507, 0x07030901, 0x07030d01,
|
3861
|
-
0x07030f05, 0x07030f0d, 0x07050101, 0x07050305, 0x07050501, 0x07050705, 0x07050709, 0x07050b01,
|
3862
|
-
0x07070103, 0x07070301, 0x07070309, 0x07070503, 0x07070507, 0x0707050f, 0x07070701, 0x07070903,
|
3863
|
-
0x07070907, 0x0707090f, 0x07070b0b, 0x07070f07, 0x07090107, 0x07090303, 0x0709030d, 0x07090505,
|
3864
|
-
0x07090703, 0x07090b05, 0x07090d01, 0x07090d09, 0x070b0103, 0x070b0301, 0x070b0305, 0x070b050b,
|
3865
|
-
0x070b0705, 0x070b0909, 0x070b0b0d, 0x070b0f07, 0x070d030d, 0x070d0903, 0x070f0103, 0x070f0107,
|
3866
|
-
0x070f0501, 0x070f0505, 0x070f070b, 0x09010101, 0x09010109, 0x09010305, 0x09010501, 0x09010509,
|
3867
|
-
0x0901050f, 0x09010705, 0x09010903, 0x09010b01, 0x09010f01, 0x09030105, 0x0903010f, 0x09030303,
|
3868
|
-
0x09030307, 0x09030505, 0x09030701, 0x0903070b, 0x09030907, 0x09030b03, 0x09030b0b, 0x09050103,
|
3869
|
-
0x09050107, 0x09050301, 0x0905030b, 0x09050503, 0x09050707, 0x09050901, 0x09050b0f, 0x09050d05,
|
3870
|
-
0x09050f01, 0x09070109, 0x09070303, 0x09070307, 0x09070501, 0x09070505, 0x09070703, 0x0907070b,
|
3871
|
-
0x09090101, 0x09090105, 0x09090509, 0x0909070f, 0x09090901, 0x09090f03, 0x090b010b, 0x090b010f,
|
3872
|
-
0x090b0503, 0x090b0d05, 0x090d0307, 0x090d0709, 0x090d0d01, 0x090f0301, 0x090f030b, 0x090f0701,
|
3873
|
-
0x090f0907, 0x090f0b03, 0x0b010105, 0x0b010301, 0x0b010309, 0x0b010505, 0x0b010901, 0x0b010909,
|
3874
|
-
0x0b01090f, 0x0b010b05, 0x0b010d0d, 0x0b010f09, 0x0b030103, 0x0b030107, 0x0b03010b, 0x0b030305,
|
3875
|
-
0x0b030503, 0x0b030705, 0x0b030f05, 0x0b050101, 0x0b050303, 0x0b050507, 0x0b050701, 0x0b05070d,
|
3876
|
-
0x0b050b07, 0x0b070105, 0x0b07010f, 0x0b070301, 0x0b07050f, 0x0b070909, 0x0b070b03, 0x0b070d0b,
|
3877
|
-
0x0b070f07, 0x0b090103, 0x0b090109, 0x0b090501, 0x0b090705, 0x0b09090d, 0x0b0b0305, 0x0b0b050d,
|
3878
|
-
0x0b0b0b03, 0x0b0b0b07, 0x0b0d0905, 0x0b0f0105, 0x0b0f0109, 0x0b0f0505, 0x0d010303, 0x0d010307,
|
3879
|
-
0x0d01030b, 0x0d010703, 0x0d010707, 0x0d010d01, 0x0d030101, 0x0d030501, 0x0d03050f, 0x0d030d09,
|
3880
|
-
0x0d050305, 0x0d050709, 0x0d050905, 0x0d050b0b, 0x0d050d05, 0x0d050f01, 0x0d070101, 0x0d070309,
|
3881
|
-
0x0d070503, 0x0d070901, 0x0d09050b, 0x0d090907, 0x0d090d05, 0x0d0b0101, 0x0d0b0107, 0x0d0b0709,
|
3882
|
-
0x0d0b0d01, 0x0d0d010b, 0x0d0d0901, 0x0d0f0303, 0x0d0f0307, 0x0f010101, 0x0f010109, 0x0f01010f,
|
3883
|
-
0x0f010501, 0x0f010505, 0x0f01070d, 0x0f010901, 0x0f010b09, 0x0f010d05, 0x0f030105, 0x0f030303,
|
3884
|
-
0x0f030509, 0x0f030907, 0x0f03090b, 0x0f050103, 0x0f050109, 0x0f050301, 0x0f05030d, 0x0f050503,
|
3885
|
-
0x0f050701, 0x0f050b03, 0x0f070105, 0x0f070705, 0x0f07070b, 0x0f070b07, 0x0f090103, 0x0f09010b,
|
3886
|
-
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
|
3887
|
-
};
|
3888
|
-
|
3889
|
-
#define NGRID_IQ2XXS 512
|
3890
|
-
static const uint64_t iq1s_grid[NGRID_IQ2XXS] = {
|
3891
|
-
0xffffffffffff0101, 0xffffffffff01ff00, 0xffffffffff010100, 0xffffffff00000000,
|
3892
|
-
0xffffffff01ff00ff, 0xffffffff01ff0001, 0xffffffff0101ffff, 0xffffffff0101ff01,
|
3893
|
-
0xffffff00ff000000, 0xffffff000000ff00, 0xffffff00000000ff, 0xffffff0000000100,
|
3894
|
-
0xffffff0000010000, 0xffffff0001000000, 0xffffff01ffff00ff, 0xffffff01ff01ff00,
|
3895
|
-
0xffffff01ff010100, 0xffffff0100000001, 0xffffff0101ffff00, 0xffffff0101ff0101,
|
3896
|
-
0xffffff0101010100, 0xffff00ffff00ff01, 0xffff00ffff0000ff, 0xffff00ff00ff0100,
|
3897
|
-
0xffff00ff0100ff00, 0xffff00ff010001ff, 0xffff0000ff0101ff, 0xffff000000ffff00,
|
3898
|
-
0xffff000000000000, 0xffff00000001ff01, 0xffff000001000101, 0xffff0000010100ff,
|
3899
|
-
0xffff0001ffff0100, 0xffff00010000ff00, 0xffff000100010101, 0xffff000101000000,
|
3900
|
-
0xffff01ffffff0000, 0xffff01ffff01ffff, 0xffff01ffff010100, 0xffff01ff00000000,
|
3901
|
-
0xffff01ff01ffffff, 0xffff01ff01ff0001, 0xffff01ff0101ffff, 0xffff01ff01010001,
|
3902
|
-
0xffff0100ffffff01, 0xffff01000000ffff, 0xffff010000000100, 0xffff010001ff01ff,
|
3903
|
-
0xffff010001000000, 0xffff0101ff000000, 0xffff0101000101ff, 0xffff010101ffff01,
|
3904
|
-
0xffff01010101ff00, 0xff00ffffff000000, 0xff00ffff00ffff00, 0xff00ffff00000001,
|
3905
|
-
0xff00ffff000001ff, 0xff00ffff01010000, 0xff00ff00ffff0000, 0xff00ff00ff00ff00,
|
3906
|
-
0xff00ff00ff0000ff, 0xff00ff00ff000100, 0xff00ff00ff010001, 0xff00ff0000ff0001,
|
3907
|
-
0xff00ff000000ffff, 0xff00ff0000000000, 0xff00ff000001ff00, 0xff00ff0000010100,
|
3908
|
-
0xff00ff0001ff0000, 0xff00ff000100ff00, 0xff00ff0001000100, 0xff00ff01ff000000,
|
3909
|
-
0xff00ff0100ff0000, 0xff00ff01000001ff, 0xff00ff0101010001, 0xff0000ff00000000,
|
3910
|
-
0xff0000ff0001ff00, 0xff0000ff00010100, 0xff000000ffff0101, 0xff000000ff000000,
|
3911
|
-
0xff000000ff01ff00, 0xff00000000ff0000, 0xff0000000000ff00, 0xff000000000000ff,
|
3912
|
-
0xff00000000000000, 0xff00000000000001, 0xff00000000000100, 0xff0000000001ffff,
|
3913
|
-
0xff00000000010000, 0xff00000001000000, 0xff00000001010100, 0xff000001ff00ff01,
|
3914
|
-
0xff000001ff0100ff, 0xff00000100000000, 0xff0000010001ff00, 0xff00000101ff0100,
|
3915
|
-
0xff0000010100ff00, 0xff0001ff00ff00ff, 0xff0001ff00000101, 0xff0001ff000100ff,
|
3916
|
-
0xff0001ff01000000, 0xff000100ff0001ff, 0xff0001000000ff01, 0xff00010000000000,
|
3917
|
-
0xff00010000010001, 0xff00010000010100, 0xff00010001ffff00, 0xff00010001ff0101,
|
3918
|
-
0xff00010001010000, 0xff000101ffffffff, 0xff000101ff000101, 0xff00010101ff00ff,
|
3919
|
-
0xff00010101000001, 0xff000101010100ff, 0xff01ffffff000101, 0xff01ffffff01ffff,
|
3920
|
-
0xff01ffffff01ff01, 0xff01ffffff0101ff, 0xff01ffff00000000, 0xff01ffff01ff0001,
|
3921
|
-
0xff01ffff0101ff01, 0xff01ff00ff000000, 0xff01ff0000ff0100, 0xff01ff000000ff01,
|
3922
|
-
0xff01ff0000010000, 0xff01ff00010000ff, 0xff01ff01ff01ff00, 0xff01ff0100000101,
|
3923
|
-
0xff0100ffffff0000, 0xff0100ffff010000, 0xff0100ff01ff00ff, 0xff0100ff01000100,
|
3924
|
-
0xff0100ff010100ff, 0xff010000ffffff01, 0xff01000000000000, 0xff0100000101ff00,
|
3925
|
-
0xff010001ffff00ff, 0xff010001ff000100, 0xff01000100ffff00, 0xff01000100010001,
|
3926
|
-
0xff01000101ff0001, 0xff010001010001ff, 0xff0101ffffffffff, 0xff0101ffff01ffff,
|
3927
|
-
0xff0101ffff010101, 0xff0101ff0000ff00, 0xff0101ff01010001, 0xff010100ff000000,
|
3928
|
-
0xff010100ff01ff01, 0xff01010000ff0001, 0xff01010000000100, 0xff01010001000000,
|
3929
|
-
0xff0101010100ffff, 0x00ffffff0000ff01, 0x00ffffff000000ff, 0x00ffffff00000100,
|
3930
|
-
0x00ffffff00010000, 0x00ffff00ffff0001, 0x00ffff00ff0000ff, 0x00ffff00ff000100,
|
3931
|
-
0x00ffff0000000000, 0x00ffff0001000100, 0x00ffff0001010001, 0x00ffff01ff00ff01,
|
3932
|
-
0x00ffff0100ff0100, 0x00ffff010000ff00, 0x00ffff01000100ff, 0x00ffff0101ff00ff,
|
3933
|
-
0x00ffff010101ff00, 0x00ff00ffffffffff, 0x00ff00ffffff01ff, 0x00ff00ffff000101,
|
3934
|
-
0x00ff00ff00000000, 0x00ff00ff000101ff, 0x00ff00ff01010101, 0x00ff0000ff000000,
|
3935
|
-
0x00ff0000ff01ffff, 0x00ff000000ff0000, 0x00ff00000000ff00, 0x00ff0000000000ff,
|
3936
|
-
0x00ff000000000000, 0x00ff000000000001, 0x00ff000000000100, 0x00ff000000010000,
|
3937
|
-
0x00ff000001ffff01, 0x00ff000001000000, 0x00ff0001ff000101, 0x00ff000100ffffff,
|
3938
|
-
0x00ff000100000000, 0x00ff0001010001ff, 0x00ff01ffff000000, 0x00ff01ff0001ff00,
|
3939
|
-
0x00ff01ff01ff0100, 0x00ff0100ff01ff01, 0x00ff010000ff00ff, 0x00ff010000ff0101,
|
3940
|
-
0x00ff010000000000, 0x00ff010000010101, 0x00ff01000100ff00, 0x00ff010001010000,
|
3941
|
-
0x00ff0101ffffff00, 0x00ff01010000ff01, 0x00ff010100000100, 0x00ff010101ff0000,
|
3942
|
-
0x0000ffffffff0100, 0x0000ffffff00ff00, 0x0000ffffff0000ff, 0x0000ffffff010000,
|
3943
|
-
0x0000ffff00000000, 0x0000ffff00010101, 0x0000ffff01ffff01, 0x0000ffff01000100,
|
3944
|
-
0x0000ff00ff000000, 0x0000ff00ff01ff00, 0x0000ff00ff0101ff, 0x0000ff0000ff0000,
|
3945
|
-
0x0000ff000000ff00, 0x0000ff00000000ff, 0x0000ff0000000000, 0x0000ff0000000001,
|
3946
|
-
0x0000ff0000000100, 0x0000ff0000010000, 0x0000ff0001ffffff, 0x0000ff0001ff01ff,
|
3947
|
-
0x0000ff0001000000, 0x0000ff000101ffff, 0x0000ff01ffff0101, 0x0000ff01ff010000,
|
3948
|
-
0x0000ff0100000000, 0x0000ff0101000101, 0x000000ffffff0001, 0x000000ffff000000,
|
3949
|
-
0x000000ff00ff0000, 0x000000ff0000ff00, 0x000000ff000000ff, 0x000000ff00000000,
|
3950
|
-
0x000000ff00000001, 0x000000ff00000100, 0x000000ff00010000, 0x000000ff01000000,
|
3951
|
-
0x000000ff0101ff00, 0x00000000ffff0000, 0x00000000ff00ff00, 0x00000000ff0000ff,
|
3952
|
-
0x00000000ff000000, 0x00000000ff000001, 0x00000000ff000100, 0x00000000ff010000,
|
3953
|
-
0x0000000000ffff00, 0x0000000000ff00ff, 0x0000000000ff0000, 0x0000000000ff0001,
|
3954
|
-
0x0000000000ff0100, 0x000000000000ffff, 0x000000000000ff00, 0x000000000000ff01,
|
3955
|
-
0x00000000000000ff, 0x0000000000000001, 0x00000000000001ff, 0x0000000000000100,
|
3956
|
-
0x0000000000000101, 0x000000000001ff00, 0x00000000000100ff, 0x0000000000010000,
|
3957
|
-
0x0000000000010001, 0x0000000000010100, 0x0000000001ff0000, 0x000000000100ff00,
|
3958
|
-
0x00000000010000ff, 0x0000000001000000, 0x0000000001000001, 0x0000000001000100,
|
3959
|
-
0x0000000001010000, 0x00000001ffff01ff, 0x00000001ff000000, 0x0000000100ff0000,
|
3960
|
-
0x000000010000ff00, 0x00000001000000ff, 0x0000000100000000, 0x0000000100000001,
|
3961
|
-
0x0000000100000100, 0x0000000100010000, 0x0000000101000000, 0x000001ffff00ff00,
|
3962
|
-
0x000001ffff010001, 0x000001ffff0101ff, 0x000001ff00ffff01, 0x000001ff0000ffff,
|
3963
|
-
0x000001ff00000000, 0x000001ff010000ff, 0x000001ff01010100, 0x00000100ffff0100,
|
3964
|
-
0x00000100ff000000, 0x0000010000ff0000, 0x000001000000ff00, 0x00000100000000ff,
|
3965
|
-
0x0000010000000000, 0x0000010000000001, 0x0000010000000100, 0x0000010000010000,
|
3966
|
-
0x0000010001000000, 0x000001000101ff01, 0x00000101ffff0001, 0x00000101ff01ffff,
|
3967
|
-
0x0000010100000000, 0x0000010101010100, 0x0001ffffff000000, 0x0001ffff00ffffff,
|
3968
|
-
0x0001ffff00000100, 0x0001ffff0001ff00, 0x0001ffff01000000, 0x0001ff00ffffff00,
|
3969
|
-
0x0001ff00ffff01ff, 0x0001ff00ff010000, 0x0001ff0000000000, 0x0001ff0000010001,
|
3970
|
-
0x0001ff0001ff0000, 0x0001ff0001010100, 0x0001ff01ff0000ff, 0x0001ff01ff000001,
|
3971
|
-
0x0001ff0100ffffff, 0x0001ff010001ffff, 0x0001ff01000101ff, 0x0001ff010100ff01,
|
3972
|
-
0x000100ffff00ffff, 0x000100ffff00ff01, 0x000100ffff000100, 0x000100ff00000000,
|
3973
|
-
0x000100ff000101ff, 0x000100ff01ff0101, 0x000100ff0100ffff, 0x000100ff01010101,
|
3974
|
-
0x00010000ff000000, 0x00010000ff010100, 0x0001000000ff0000, 0x000100000000ff00,
|
3975
|
-
0x00010000000000ff, 0x0001000000000000, 0x0001000000000001, 0x0001000000000100,
|
3976
|
-
0x0001000000010000, 0x0001000001ffff01, 0x0001000001000000, 0x0001000100ff0101,
|
3977
|
-
0x0001000100000000, 0x00010001010100ff, 0x000101ffffff01ff, 0x000101ffffff0101,
|
3978
|
-
0x000101ff00010000, 0x000101ff01ff0000, 0x000101ff0100ff01, 0x00010100ffff0000,
|
3979
|
-
0x0001010000000000, 0x000101000001ffff, 0x0001010000010101, 0x00010100010001ff,
|
3980
|
-
0x00010101ff00ff00, 0x00010101ff010001, 0x0001010100ffffff, 0x0001010100ff01ff,
|
3981
|
-
0x00010101000101ff, 0x0001010101ff0000, 0x000101010100ff01, 0x0001010101000101,
|
3982
|
-
0x01ffffffffff0101, 0x01ffffffff01ffff, 0x01ffffffff01ff01, 0x01ffffffff0101ff,
|
3983
|
-
0x01ffffffff010101, 0x01ffffff00000000, 0x01ffffff01ff01ff, 0x01ffffff01000101,
|
3984
|
-
0x01ffffff0101ff01, 0x01ffffff010100ff, 0x01ffff000000ff00, 0x01ffff0000000001,
|
3985
|
-
0x01ffff00000001ff, 0x01ffff0000010000, 0x01ffff0001ff0000, 0x01ffff01ffffffff,
|
3986
|
-
0x01ffff01ffff01ff, 0x01ffff01ff000000, 0x01ffff01ff01ffff, 0x01ffff01ff0101ff,
|
3987
|
-
0x01ffff010100ffff, 0x01ff00ffffff0000, 0x01ff00ffff010000, 0x01ff00ff00ffff01,
|
3988
|
-
0x01ff0000ff0000ff, 0x01ff000000000000, 0x01ff00000001ff01, 0x01ff000001ffffff,
|
3989
|
-
0x01ff000001010100, 0x01ff0001ffffff01, 0x01ff0001ff010001, 0x01ff000101ff0100,
|
3990
|
-
0x01ff000101000001, 0x01ff0001010100ff, 0x01ff01ffff00ffff, 0x01ff01ff00010001,
|
3991
|
-
0x01ff01ff01000000, 0x01ff01ff010101ff, 0x01ff0100ff000001, 0x01ff010000ffff00,
|
3992
|
-
0x01ff010000000100, 0x01ff010001ff01ff, 0x01ff01000101ffff, 0x01ff0101ffff00ff,
|
3993
|
-
0x01ff0101ffff0101, 0x01ff0101ff0101ff, 0x01ff010100010000, 0x0100ffff00ff00ff,
|
3994
|
-
0x0100ffff00ff0001, 0x0100ffff00000100, 0x0100ffff0100ff00, 0x0100ff00ffff0000,
|
3995
|
-
0x0100ff00ff00ffff, 0x0100ff00ff00ff01, 0x0100ff00ff000100, 0x0100ff00ff010000,
|
3996
|
-
0x0100ff0000000000, 0x0100ff00000100ff, 0x0100ff0001ff0101, 0x0100ff0001010101,
|
3997
|
-
0x0100ff0100ff00ff, 0x0100ff0100ff0001, 0x0100ff0100000100, 0x0100ff0100010001,
|
3998
|
-
0x0100ff0101000000, 0x010000ffff00ff00, 0x010000ff0000ffff, 0x010000ff00000000,
|
3999
|
-
0x010000ff010001ff, 0x010000ff01010001, 0x01000000ffffff00, 0x01000000ffff0101,
|
4000
|
-
0x01000000ff000000, 0x01000000ff0100ff, 0x01000000ff010101, 0x0100000000ff0000,
|
4001
|
-
0x010000000000ff00, 0x01000000000000ff, 0x0100000000000000, 0x0100000000000001,
|
4002
|
-
0x0100000000000100, 0x0100000000010000, 0x0100000001000000, 0x0100000100000000,
|
4003
|
-
0x01000001000101ff, 0x0100000101ffff01, 0x010001ffff000101, 0x010001ff00ff0100,
|
4004
|
-
0x010001ff0000ff00, 0x010001ff000100ff, 0x010001ff01ffffff, 0x01000100ffff0000,
|
4005
|
-
0x01000100ff0001ff, 0x0100010000000000, 0x010001000001ff00, 0x0100010001ff0000,
|
4006
|
-
0x01000100010000ff, 0x0100010001000101, 0x01000101ff00ff01, 0x0100010100ff0100,
|
4007
|
-
0x010001010000ffff, 0x0100010101010001, 0x0101ffffffff0101, 0x0101ffffff0001ff,
|
4008
|
-
0x0101ffffff01ffff, 0x0101ffffff010101, 0x0101ffff00000000, 0x0101ffff0101ffff,
|
4009
|
-
0x0101ffff010101ff, 0x0101ff00ff000000, 0x0101ff0000ff0100, 0x0101ff000000ff00,
|
4010
|
-
0x0101ff0000010000, 0x0101ff00010000ff, 0x0101ff0001000001, 0x0101ff01ff010101,
|
4011
|
-
0x0101ff0100000000, 0x0101ff010101ff00, 0x010100ffffff0000, 0x010100ffff010000,
|
4012
|
-
0x010100ff00ff01ff, 0x010100ff000000ff, 0x010100ff00000101, 0x010100ff01ffff00,
|
4013
|
-
0x01010000ffffff01, 0x01010000ff000100, 0x01010000ff01ff01, 0x0101000000000000,
|
4014
|
-
0x01010000000100ff, 0x010100000101ff01, 0x01010001ffff0000, 0x01010001ff00ffff,
|
4015
|
-
0x01010001ff010000, 0x0101000101ffffff, 0x0101000101ff01ff, 0x0101000101010101,
|
4016
|
-
0x010101ffff01ffff, 0x010101ff00000000, 0x010101ff0001ff01, 0x010101ff0101ffff,
|
4017
|
-
0x010101ff010101ff, 0x01010100ffffffff, 0x01010100ff000001, 0x010101000000ff00,
|
4018
|
-
0x0101010001010000, 0x0101010100ff0001, 0x010101010001ff01, 0x010101010101ffff,
|
4019
|
-
|
4020
|
-
};
|
4021
|
-
|
4022
|
-
static const uint8_t ksigns_iq2xs[128] = {
|
4023
|
-
0, 129, 130, 3, 132, 5, 6, 135, 136, 9, 10, 139, 12, 141, 142, 15,
|
4024
|
-
144, 17, 18, 147, 20, 149, 150, 23, 24, 153, 154, 27, 156, 29, 30, 159,
|
4025
|
-
160, 33, 34, 163, 36, 165, 166, 39, 40, 169, 170, 43, 172, 45, 46, 175,
|
4026
|
-
48, 177, 178, 51, 180, 53, 54, 183, 184, 57, 58, 187, 60, 189, 190, 63,
|
4027
|
-
192, 65, 66, 195, 68, 197, 198, 71, 72, 201, 202, 75, 204, 77, 78, 207,
|
4028
|
-
80, 209, 210, 83, 212, 85, 86, 215, 216, 89, 90, 219, 92, 221, 222, 95,
|
4029
|
-
96, 225, 226, 99, 228, 101, 102, 231, 232, 105, 106, 235, 108, 237, 238, 111,
|
4030
|
-
240, 113, 114, 243, 116, 245, 246, 119, 120, 249, 250, 123, 252, 125, 126, 255,
|
4031
|
-
};
|
3281
|
+
size_t quantize_q8_0(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
3282
|
+
(void)quant_weights; // not used
|
3283
|
+
const size_t row_size = ggml_row_size(GGML_TYPE_Q8_0, n_per_row);
|
3284
|
+
quantize_row_q8_0_reference(src, dst, nrow*n_per_row);
|
3285
|
+
return nrow * row_size;
|
3286
|
+
}
|
4032
3287
|
|
4033
|
-
|
3288
|
+
// ====================== "True" 2-bit (de)-quantization
|
4034
3289
|
|
4035
3290
|
void dequantize_row_iq2_xxs(const block_iq2_xxs * restrict x, float * restrict y, int k) {
|
4036
3291
|
assert(k % QK_K == 0);
|
@@ -4198,39 +3453,23 @@ void dequantize_row_iq1_s(const block_iq1_s * restrict x, float * restrict y, in
|
|
4198
3453
|
assert(k % QK_K == 0);
|
4199
3454
|
const int nb = k / QK_K;
|
4200
3455
|
|
4201
|
-
float db[4];
|
4202
|
-
uint16_t idx[4];
|
4203
|
-
//const int8_t * grid[4];
|
4204
|
-
|
4205
3456
|
for (int i = 0; i < nb; i++) {
|
4206
3457
|
|
4207
3458
|
const float d = GGML_FP16_TO_FP32(x[i].d);
|
4208
|
-
const uint8_t
|
4209
|
-
const
|
3459
|
+
const uint8_t * qs = x[i].qs;
|
3460
|
+
const uint16_t * qh = x[i].qh;
|
4210
3461
|
|
4211
|
-
for (int
|
4212
|
-
|
4213
|
-
|
4214
|
-
idx[2] = qs[2] | ((sc[1] & 0x08) << 5);
|
4215
|
-
idx[3] = qs[3] | ((sc[1] & 0x80) << 1);
|
4216
|
-
//grid[0] = (const int8_t *)(iq1s_grid + (qs[0] | ((sc[0] & 0x08) << 5)));
|
4217
|
-
//grid[1] = (const int8_t *)(iq1s_grid + (qs[1] | ((sc[0] & 0x80) << 1)));
|
4218
|
-
//grid[2] = (const int8_t *)(iq1s_grid + (qs[2] | ((sc[1] & 0x08) << 5)));
|
4219
|
-
//grid[3] = (const int8_t *)(iq1s_grid + (qs[3] | ((sc[1] & 0x80) << 1)));
|
4220
|
-
db[0] = d * (2*(sc[0] & 7) + 1);
|
4221
|
-
db[1] = d * (2*((sc[0] >> 4) & 7) + 1);
|
4222
|
-
db[2] = d * (2*(sc[1] & 7) + 1);
|
4223
|
-
db[3] = d * (2*((sc[1] >> 4) & 7) + 1);
|
3462
|
+
for (int ib = 0; ib < QK_K/32; ++ib) {
|
3463
|
+
const float dl = d * (2*((qh[ib] >> 12) & 7) + 1);
|
3464
|
+
const float delta = qh[ib] & 0x8000 ? -IQ1S_DELTA : IQ1S_DELTA;
|
4224
3465
|
for (int l = 0; l < 4; ++l) {
|
4225
|
-
const int8_t * grid = (const int8_t *)(iq1s_grid +
|
3466
|
+
const int8_t * grid = (const int8_t *)(iq1s_grid + (qs[l] | (((qh[ib] >> 3*l) & 7) << 8)));
|
4226
3467
|
for (int j = 0; j < 8; ++j) {
|
4227
|
-
|
4228
|
-
y[j] = db[l] * grid[j];
|
3468
|
+
y[j] = dl * (grid[j] + delta);
|
4229
3469
|
}
|
4230
3470
|
y += 8;
|
4231
3471
|
}
|
4232
3472
|
qs += 4;
|
4233
|
-
sc += 2;
|
4234
3473
|
}
|
4235
3474
|
}
|
4236
3475
|
}
|
@@ -4784,10 +4023,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
4784
4023
|
const block_q8_1 * restrict b_y0 = &vy0[i];
|
4785
4024
|
const block_q8_1 * restrict b_y1 = &vy1[i];
|
4786
4025
|
|
4787
|
-
float32x4_t summs_t = {GGML_FP16_TO_FP32(b_x0->m) * b_y0->s,
|
4788
|
-
GGML_FP16_TO_FP32(b_x1->m) * b_y0->s,
|
4789
|
-
GGML_FP16_TO_FP32(b_x0->m) * b_y1->s,
|
4790
|
-
GGML_FP16_TO_FP32(b_x1->m) * b_y1->s};
|
4026
|
+
float32x4_t summs_t = {GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y0->s),
|
4027
|
+
GGML_FP16_TO_FP32(b_x1->m) * GGML_FP16_TO_FP32(b_y0->s),
|
4028
|
+
GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y1->s),
|
4029
|
+
GGML_FP16_TO_FP32(b_x1->m) * GGML_FP16_TO_FP32(b_y1->s)};
|
4791
4030
|
summs0 += summs_t;
|
4792
4031
|
|
4793
4032
|
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
@@ -4808,10 +4047,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
4808
4047
|
const int8x16_t y1_h = vld1q_s8(b_y1->qs + 16);
|
4809
4048
|
|
4810
4049
|
// mmla into int32x4_t
|
4811
|
-
float32x4_t scale = {GGML_FP16_TO_FP32(b_x0->d)*
|
4812
|
-
GGML_FP16_TO_FP32(b_x0->d)*
|
4813
|
-
GGML_FP16_TO_FP32(b_x1->d)*
|
4814
|
-
GGML_FP16_TO_FP32(b_x1->d)*
|
4050
|
+
float32x4_t scale = {GGML_FP16_TO_FP32(b_x0->d)*b_y0->d,
|
4051
|
+
GGML_FP16_TO_FP32(b_x0->d)*b_y1->d,
|
4052
|
+
GGML_FP16_TO_FP32(b_x1->d)*b_y0->d,
|
4053
|
+
GGML_FP16_TO_FP32(b_x1->d)*b_y1->d};
|
4815
4054
|
|
4816
4055
|
int8x16_t l0 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l)));
|
4817
4056
|
int8x16_t l1 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l)));
|
@@ -4852,7 +4091,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
4852
4091
|
const block_q8_1 * restrict y0 = &y[i + 0];
|
4853
4092
|
const block_q8_1 * restrict y1 = &y[i + 1];
|
4854
4093
|
|
4855
|
-
summs += GGML_FP16_TO_FP32(x0->m) * y0->s + GGML_FP16_TO_FP32(x1->m) * y1->s;
|
4094
|
+
summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s) + GGML_FP16_TO_FP32(x1->m) * GGML_FP16_TO_FP32(y1->s);
|
4856
4095
|
|
4857
4096
|
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
4858
4097
|
|
@@ -4875,8 +4114,8 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
4875
4114
|
const int32x4_t p_0 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), v0_0l, v1_0l), v0_0h, v1_0h);
|
4876
4115
|
const int32x4_t p_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), v0_1l, v1_1l), v0_1h, v1_1h);
|
4877
4116
|
|
4878
|
-
sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_FP16_TO_FP32(x0->d)*y0->d);
|
4879
|
-
sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_FP16_TO_FP32(x1->d)*y1->d);
|
4117
|
+
sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d));
|
4118
|
+
sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d));
|
4880
4119
|
}
|
4881
4120
|
|
4882
4121
|
*s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs;
|
@@ -4889,9 +4128,9 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
4889
4128
|
// Main loop
|
4890
4129
|
for (int i = 0; i < nb; ++i) {
|
4891
4130
|
const float d0 = GGML_FP16_TO_FP32(x[i].d);
|
4892
|
-
const float d1 = y[i].d;
|
4131
|
+
const float d1 = GGML_FP16_TO_FP32(y[i].d);
|
4893
4132
|
|
4894
|
-
summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s;
|
4133
|
+
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s);
|
4895
4134
|
|
4896
4135
|
const __m256 d0v = _mm256_set1_ps( d0 );
|
4897
4136
|
const __m256 d1v = _mm256_set1_ps( d1 );
|
@@ -4943,7 +4182,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
4943
4182
|
|
4944
4183
|
int sumi = __riscv_vmv_x_s_i32m1_i32(vs2);
|
4945
4184
|
|
4946
|
-
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
|
4185
|
+
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s);
|
4947
4186
|
}
|
4948
4187
|
|
4949
4188
|
*s = sumf;
|
@@ -4961,7 +4200,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
4961
4200
|
sumi += (v0 * y[i].qs[j]) + (v1 * y[i].qs[j + qk/2]);
|
4962
4201
|
}
|
4963
4202
|
|
4964
|
-
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
|
4203
|
+
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s);
|
4965
4204
|
}
|
4966
4205
|
|
4967
4206
|
*s = sumf;
|
@@ -5297,8 +4536,8 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
5297
4536
|
|
5298
4537
|
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
5299
4538
|
|
5300
|
-
summs0 += GGML_FP16_TO_FP32(x0->m) * y0->s;
|
5301
|
-
summs1 += GGML_FP16_TO_FP32(x1->m) * y1->s;
|
4539
|
+
summs0 += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s);
|
4540
|
+
summs1 += GGML_FP16_TO_FP32(x1->m) * GGML_FP16_TO_FP32(y1->s);
|
5302
4541
|
|
5303
4542
|
// extract the 5th bit via lookup table ((b) << 4)
|
5304
4543
|
memcpy(&qh0, x0->qh, sizeof(qh0));
|
@@ -5342,10 +4581,10 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
5342
4581
|
|
5343
4582
|
sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(
|
5344
4583
|
ggml_vdotq_s32(vdupq_n_s32(0), v0_0lf, v1_0l),
|
5345
|
-
ggml_vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_FP16_TO_FP32(x0->d)*y0->d);
|
4584
|
+
ggml_vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d));
|
5346
4585
|
sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(
|
5347
4586
|
ggml_vdotq_s32(vdupq_n_s32(0), v0_1lf, v1_1l),
|
5348
|
-
ggml_vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*y1->d);
|
4587
|
+
ggml_vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d));
|
5349
4588
|
}
|
5350
4589
|
|
5351
4590
|
*s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs0 + summs1;
|
@@ -5362,7 +4601,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
5362
4601
|
const block_q5_1 * restrict x0 = &x[i];
|
5363
4602
|
const block_q8_1 * restrict y0 = &y[i];
|
5364
4603
|
|
5365
|
-
summs += GGML_FP16_TO_FP32(x0->m) * y0->s;
|
4604
|
+
summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s);
|
5366
4605
|
|
5367
4606
|
const v128_t m4b = wasm_i8x16_splat(0x0F);
|
5368
4607
|
|
@@ -5409,7 +4648,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
5409
4648
|
wasm_i32x4_dot_i16x8(v0lfh, v1lh)),
|
5410
4649
|
wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0hfl, v1hl),
|
5411
4650
|
wasm_i32x4_dot_i16x8(v0hfh, v1hh)))),
|
5412
|
-
wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * y0->d)));
|
4651
|
+
wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * GGML_FP16_TO_FP32(y0->d))));
|
5413
4652
|
}
|
5414
4653
|
|
5415
4654
|
*s = wasm_f32x4_extract_lane(sumv, 0) + wasm_f32x4_extract_lane(sumv, 1) +
|
@@ -5424,14 +4663,14 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
5424
4663
|
for (int i = 0; i < nb; i++) {
|
5425
4664
|
const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d));
|
5426
4665
|
|
5427
|
-
summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s;
|
4666
|
+
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s);
|
5428
4667
|
|
5429
4668
|
__m256i qx = bytes_from_nibbles_32(x[i].qs);
|
5430
4669
|
__m256i bxhi = bytes_from_bits_32(x[i].qh);
|
5431
4670
|
bxhi = _mm256_and_si256(bxhi, _mm256_set1_epi8(0x10));
|
5432
4671
|
qx = _mm256_or_si256(qx, bxhi);
|
5433
4672
|
|
5434
|
-
const __m256 dy = _mm256_set1_ps(y[i].d);
|
4673
|
+
const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[i].d));
|
5435
4674
|
const __m256i qy = _mm256_loadu_si256((const __m256i *)y[i].qs);
|
5436
4675
|
|
5437
4676
|
const __m256 q = mul_sum_us8_pairs_float(qx, qy);
|
@@ -5451,7 +4690,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
5451
4690
|
for (int i = 0; i < nb; i++) {
|
5452
4691
|
const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d));
|
5453
4692
|
|
5454
|
-
summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s;
|
4693
|
+
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s);
|
5455
4694
|
|
5456
4695
|
__m256i bx_0 = bytes_from_nibbles_32(x[i].qs);
|
5457
4696
|
const __m256i bxhi = bytes_from_bits_32(x[i].qh);
|
@@ -5465,7 +4704,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
5465
4704
|
bxh = _mm_or_si128(bxh, bxhih);
|
5466
4705
|
bx_0 = MM256_SET_M128I(bxh, bxl);
|
5467
4706
|
|
5468
|
-
const __m256 dy = _mm256_set1_ps(y[i].d);
|
4707
|
+
const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[i].d));
|
5469
4708
|
const __m256i by_0 = _mm256_loadu_si256((const __m256i *)y[i].qs);
|
5470
4709
|
|
5471
4710
|
const __m256 q = mul_sum_us8_pairs_float(bx_0, by_0);
|
@@ -5532,7 +4771,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
5532
4771
|
|
5533
4772
|
int sumi = __riscv_vmv_x_s_i32m1_i32(vs2);
|
5534
4773
|
|
5535
|
-
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
|
4774
|
+
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s);
|
5536
4775
|
}
|
5537
4776
|
|
5538
4777
|
*s = sumf;
|
@@ -5556,7 +4795,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|
5556
4795
|
sumi += (x0 * y[i].qs[j]) + (x1 * y[i].qs[j + qk/2]);
|
5557
4796
|
}
|
5558
4797
|
|
5559
|
-
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
|
4798
|
+
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s);
|
5560
4799
|
}
|
5561
4800
|
|
5562
4801
|
*s = sumf;
|
@@ -9758,8 +8997,8 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
9758
8997
|
|
9759
8998
|
static const uint8_t k_mask2[16] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,};
|
9760
8999
|
|
9761
|
-
const
|
9762
|
-
const uint8x16_t
|
9000
|
+
const ggml_uint8x16x2_t mask1 = ggml_vld1q_u8_x2(k_mask1);
|
9001
|
+
const uint8x16_t mask2 = vld1q_u8(k_mask2);
|
9763
9002
|
const uint8x16_t m1 = vdupq_n_u8(1);
|
9764
9003
|
const int32x4_t vzero = vdupq_n_s32(0);
|
9765
9004
|
|
@@ -9790,7 +9029,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
9790
9029
|
vld1_s8((const int8_t *)(iq2s_grid + (qs[7] | ((qh[ib32+1] << 2) & 0x300)))));
|
9791
9030
|
qs += 8;
|
9792
9031
|
|
9793
|
-
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | (signs[1] << 16)));
|
9032
|
+
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | ((uint32_t) signs[1] << 16)));
|
9794
9033
|
vs.val[1] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2);
|
9795
9034
|
vs.val[0] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2);
|
9796
9035
|
vs.val[0] = vceqq_u8(vs.val[0], mask2);
|
@@ -9799,7 +9038,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
9799
9038
|
q2s.val[0] = vmulq_s8(vreinterpretq_s8_u8(vorrq_u8(vs.val[0], m1)), q2s.val[0]);
|
9800
9039
|
q2s.val[1] = vmulq_s8(vreinterpretq_s8_u8(vorrq_u8(vs.val[1], m1)), q2s.val[1]);
|
9801
9040
|
|
9802
|
-
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[2] | (signs[3] << 16)));
|
9041
|
+
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[2] | ((uint32_t) signs[3] << 16)));
|
9803
9042
|
vs.val[1] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2);
|
9804
9043
|
vs.val[0] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2);
|
9805
9044
|
vs.val[0] = vceqq_u8(vs.val[0], mask2);
|
@@ -9870,12 +9109,12 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
|
9870
9109
|
iq2s_grid[qs[4] | ((qh[ib32+1] << 8) & 0x300)]);
|
9871
9110
|
qs += 8;
|
9872
9111
|
|
9873
|
-
__m256i aux256 = _mm256_set1_epi32(signs[0] | (signs[1] << 16));
|
9112
|
+
__m256i aux256 = _mm256_set1_epi32(signs[0] | ((uint32_t) signs[1] << 16));
|
9874
9113
|
aux256 = _mm256_and_si256(_mm256_shuffle_epi8(aux256,mask1), mask2);
|
9875
9114
|
const __m256i s2_1 = _mm256_cmpeq_epi8(aux256, mask2);
|
9876
9115
|
const __m256i q8s_1 = _mm256_sub_epi8(_mm256_xor_si256(s2_1, q8_1), s2_1);
|
9877
9116
|
|
9878
|
-
aux256 = _mm256_set1_epi32(signs[2] | (signs[3] << 16));
|
9117
|
+
aux256 = _mm256_set1_epi32(signs[2] | ((uint32_t) signs[3] << 16));
|
9879
9118
|
aux256 = _mm256_and_si256(_mm256_shuffle_epi8(aux256,mask1), mask2);
|
9880
9119
|
const __m256i s2_2 = _mm256_cmpeq_epi8(aux256, mask2);
|
9881
9120
|
const __m256i q8s_2 = _mm256_sub_epi8(_mm256_xor_si256(s2_2, q8_2), s2_2);
|
@@ -10075,7 +9314,7 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * restrict s, size_t bs, const void
|
|
10075
9314
|
#endif
|
10076
9315
|
}
|
10077
9316
|
|
10078
|
-
void ggml_vec_dot_iq3_s_q8_K (int n, float *
|
9317
|
+
void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
|
10079
9318
|
assert(n % QK_K == 0);
|
10080
9319
|
assert(nrc == 1);
|
10081
9320
|
UNUSED(nrc);
|
@@ -10103,11 +9342,12 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const v
|
|
10103
9342
|
|
10104
9343
|
static const int16_t k_shift[8] = {8, 7, 6, 5, 4, 3, 2, 1};
|
10105
9344
|
|
10106
|
-
const
|
10107
|
-
const uint8x16_t
|
10108
|
-
|
10109
|
-
const
|
10110
|
-
const
|
9345
|
+
const ggml_uint8x16x2_t mask1 = ggml_vld1q_u8_x2(k_mask1);
|
9346
|
+
const uint8x16_t mask2 = vld1q_u8(k_mask2);
|
9347
|
+
|
9348
|
+
const int16x8_t hshift = vld1q_s16(k_shift);
|
9349
|
+
const uint16x8_t m256 = vdupq_n_u16(256);
|
9350
|
+
const uint8x16_t m1 = vdupq_n_u8(1);
|
10111
9351
|
|
10112
9352
|
uint8x16x2_t vs;
|
10113
9353
|
ggml_int8x16x4_t q3s;
|
@@ -10139,18 +9379,18 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const v
|
|
10139
9379
|
|
10140
9380
|
const uint8x16_t idx_l = vld1q_u8(qs); qs += 16;
|
10141
9381
|
idx.vec_index = vorrq_u16(vmovl_u8(vget_low_u8 (idx_l)), vandq_u16(vshlq_u16(vdupq_n_u16(qh[ib32+0]), hshift), m256));
|
10142
|
-
const uint32x4_t aux32x4_0 =
|
10143
|
-
|
10144
|
-
const uint32x4_t aux32x4_1 =
|
10145
|
-
|
9382
|
+
const uint32x4_t aux32x4_0 = ggml_vld1q_u32(iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
|
9383
|
+
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]);
|
9384
|
+
const uint32x4_t aux32x4_1 = ggml_vld1q_u32(iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
|
9385
|
+
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]);
|
10146
9386
|
idx.vec_index = vorrq_u16(vmovl_u8(vget_high_u8(idx_l)), vandq_u16(vshlq_u16(vdupq_n_u16(qh[ib32+1]), hshift), m256));
|
10147
|
-
const uint32x4_t aux32x4_2 =
|
10148
|
-
|
10149
|
-
const uint32x4_t aux32x4_3 =
|
10150
|
-
|
9387
|
+
const uint32x4_t aux32x4_2 = ggml_vld1q_u32(iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
|
9388
|
+
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]);
|
9389
|
+
const uint32x4_t aux32x4_3 = ggml_vld1q_u32(iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
|
9390
|
+
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]);
|
10151
9391
|
|
10152
9392
|
|
10153
|
-
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | (signs[1] << 16)));
|
9393
|
+
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | ((uint32_t) signs[1] << 16)));
|
10154
9394
|
vs.val[1] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2);
|
10155
9395
|
vs.val[0] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2);
|
10156
9396
|
vs.val[0] = vorrq_u8(vceqq_u8(vs.val[0], mask2), m1);
|
@@ -10159,7 +9399,7 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const v
|
|
10159
9399
|
q3s.val[0] = vmulq_s8(vreinterpretq_s8_u8(vs.val[0]), vreinterpretq_s8_u32(aux32x4_0));
|
10160
9400
|
q3s.val[1] = vmulq_s8(vreinterpretq_s8_u8(vs.val[1]), vreinterpretq_s8_u32(aux32x4_1));
|
10161
9401
|
|
10162
|
-
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[2] | (signs[3] << 16)));
|
9402
|
+
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[2] | ((uint32_t) signs[3] << 16)));
|
10163
9403
|
vs.val[1] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2);
|
10164
9404
|
vs.val[0] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2);
|
10165
9405
|
vs.val[0] = vorrq_u8(vceqq_u8(vs.val[0], mask2), m1);
|
@@ -10322,7 +9562,7 @@ static inline __m256i mul_add_epi8(const __m256i x, const __m256i y) {
|
|
10322
9562
|
}
|
10323
9563
|
#endif
|
10324
9564
|
|
10325
|
-
void ggml_vec_dot_iq1_s_q8_K (int n, float *
|
9565
|
+
void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
|
10326
9566
|
assert(n % QK_K == 0);
|
10327
9567
|
assert(nrc == 1);
|
10328
9568
|
UNUSED(nrc);
|
@@ -10335,155 +9575,119 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const
|
|
10335
9575
|
|
10336
9576
|
const int nb = n / QK_K;
|
10337
9577
|
|
10338
|
-
|
10339
|
-
#if defined __ARM_NEON && QK_K == 256
|
10340
|
-
|
10341
|
-
const uint8x16_t m8 = vdupq_n_u8(0x08);
|
10342
|
-
const uint8x16_t m7 = vdupq_n_u8(0x07);
|
10343
|
-
const uint8x16_t m1 = vdupq_n_u8(0x01);
|
10344
|
-
const int32x4_t vzero = vdupq_n_s32(0);
|
9578
|
+
#if defined __ARM_NEON
|
10345
9579
|
|
10346
|
-
|
10347
|
-
uint16x8x2_t vindex;
|
10348
|
-
int8x16x4_t q1b;
|
9580
|
+
ggml_int8x16x4_t q1b;
|
10349
9581
|
ggml_int8x16x4_t q8b;
|
10350
|
-
uint16x8x4_t scales;
|
10351
|
-
int32x4x2_t sumi;
|
10352
|
-
int32x4x2_t dotq;
|
10353
9582
|
|
10354
9583
|
float sumf = 0;
|
10355
9584
|
for (int i = 0; i < nb; ++i) {
|
10356
9585
|
|
10357
|
-
const int8_t
|
10358
|
-
const uint8_t
|
10359
|
-
const
|
9586
|
+
const int8_t * q8 = y[i].qs;
|
9587
|
+
const uint8_t * qs = x[i].qs;
|
9588
|
+
const uint16_t * qh = x[i].qh;
|
9589
|
+
|
9590
|
+
int sumi1 = 0, sumi2 = 0, sumi3 = 0;
|
10360
9591
|
|
10361
|
-
|
9592
|
+
for (int ib = 0; ib < QK_K/32; ib += 2) {
|
10362
9593
|
|
10363
|
-
|
10364
|
-
|
10365
|
-
const
|
10366
|
-
|
10367
|
-
const
|
10368
|
-
|
10369
|
-
|
10370
|
-
|
10371
|
-
|
10372
|
-
scales.val[0] = vmovl_u8(vget_low_u8 (scales8));
|
10373
|
-
scales.val[1] = vmovl_u8(vget_high_u8 (scales8));
|
9594
|
+
q1b.val[0] = vcombine_s8(vld1_s8((const int8_t *)(iq1s_grid + (qs[0] | ((qh[ib+0] << 8) & 0x700)))),
|
9595
|
+
vld1_s8((const int8_t *)(iq1s_grid + (qs[1] | ((qh[ib+0] << 5) & 0x700)))));
|
9596
|
+
q1b.val[1] = vcombine_s8(vld1_s8((const int8_t *)(iq1s_grid + (qs[2] | ((qh[ib+0] << 2) & 0x700)))),
|
9597
|
+
vld1_s8((const int8_t *)(iq1s_grid + (qs[3] | ((qh[ib+0] >> 1) & 0x700)))));
|
9598
|
+
q1b.val[2] = vcombine_s8(vld1_s8((const int8_t *)(iq1s_grid + (qs[4] | ((qh[ib+1] << 8) & 0x700)))),
|
9599
|
+
vld1_s8((const int8_t *)(iq1s_grid + (qs[5] | ((qh[ib+1] << 5) & 0x700)))));
|
9600
|
+
q1b.val[3] = vcombine_s8(vld1_s8((const int8_t *)(iq1s_grid + (qs[6] | ((qh[ib+1] << 2) & 0x700)))),
|
9601
|
+
vld1_s8((const int8_t *)(iq1s_grid + (qs[7] | ((qh[ib+1] >> 1) & 0x700)))));
|
9602
|
+
qs += 8;
|
10374
9603
|
|
10375
|
-
|
10376
|
-
vst1q_u16(gindex+0, vindex.val[l]);
|
10377
|
-
q1b.val[0] = vcombine_s8(vld1_s8((const void *)(iq1s_grid+gindex[0])), vld1_s8((const void *)(iq1s_grid+gindex[1])));
|
10378
|
-
q1b.val[1] = vcombine_s8(vld1_s8((const void *)(iq1s_grid+gindex[2])), vld1_s8((const void *)(iq1s_grid+gindex[3])));
|
10379
|
-
q1b.val[2] = vcombine_s8(vld1_s8((const void *)(iq1s_grid+gindex[4])), vld1_s8((const void *)(iq1s_grid+gindex[5])));
|
10380
|
-
q1b.val[3] = vcombine_s8(vld1_s8((const void *)(iq1s_grid+gindex[6])), vld1_s8((const void *)(iq1s_grid+gindex[7])));
|
10381
|
-
q8b = ggml_vld1q_s8_x4(q8); q8 += 64;
|
9604
|
+
q8b = ggml_vld1q_s8_x4(q8); q8 += 64;
|
10382
9605
|
|
10383
|
-
|
10384
|
-
|
9606
|
+
const int32x4_t p1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q1b.val[0], q8b.val[0]), q1b.val[1], q8b.val[1]);
|
9607
|
+
const int32x4_t p2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q1b.val[2], q8b.val[2]), q1b.val[3], q8b.val[3]);
|
9608
|
+
|
9609
|
+
const int ls1 = 2*((qh[ib+0] >> 12) & 7) + 1;
|
9610
|
+
const int ls2 = 2*((qh[ib+1] >> 12) & 7) + 1;
|
9611
|
+
sumi1 += vaddvq_s32(p1) * ls1;
|
9612
|
+
sumi2 += vaddvq_s32(p2) * ls2;
|
9613
|
+
sumi3 += (y[i].bsums[2*ib+0] + y[i].bsums[2*ib+1]) * ls1 * (qh[ib+0] & 0x8000 ? -1 : 1)
|
9614
|
+
+ (y[i].bsums[2*ib+2] + y[i].bsums[2*ib+3]) * ls2 * (qh[ib+1] & 0x8000 ? -1 : 1);
|
10385
9615
|
|
10386
|
-
sumi.val[0] = vmlaq_s32(sumi.val[0], dotq.val[0], vreinterpretq_s32_u32(vmovl_u16(vget_low_u16 (scales.val[l]))));
|
10387
|
-
sumi.val[1] = vmlaq_s32(sumi.val[1], dotq.val[1], vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(scales.val[l]))));
|
10388
|
-
}
|
10389
9616
|
}
|
10390
9617
|
|
10391
|
-
sumf += y[i].d * GGML_FP16_TO_FP32(x[i].d) *
|
9618
|
+
sumf += y[i].d * GGML_FP16_TO_FP32(x[i].d) * (sumi1 + sumi2 + IQ1S_DELTA * sumi3);
|
10392
9619
|
}
|
10393
9620
|
|
10394
9621
|
*s = sumf;
|
10395
9622
|
|
10396
|
-
|
10397
|
-
#elif defined __AVX2__ && QK_K == 256
|
10398
|
-
|
10399
|
-
const __m128i m8 = _mm_set1_epi8(0x08);
|
10400
|
-
const __m128i m7 = _mm_set1_epi8(0x07);
|
10401
|
-
const __m128i m1 = _mm_set1_epi8(0x01);
|
10402
|
-
const __m128i shuffle_h = _mm_set_epi8(15, 7, 14, 6, 13, 5, 12, 4, 11, 3, 10, 2, 9, 1, 8, 0);
|
10403
|
-
const __m128i shuffle_s[4] = {
|
10404
|
-
_mm_set_epi32(0x03030303, 0x02020202, 0x01010101, 0x00000000),
|
10405
|
-
_mm_set_epi32(0x07070707, 0x06060606, 0x05050505, 0x04040404),
|
10406
|
-
_mm_set_epi32(0x0b0b0b0b, 0x0a0a0a0a, 0x09090909, 0x08080808),
|
10407
|
-
_mm_set_epi32(0x0f0f0f0f, 0x0e0e0e0e, 0x0d0d0d0d, 0x0c0c0c0c)
|
10408
|
-
};
|
10409
|
-
|
10410
|
-
uint64_t aux64;
|
10411
|
-
|
10412
|
-
typedef union m256i_uint16 {
|
10413
|
-
__m256i reg;
|
10414
|
-
uint16_t s[16];
|
10415
|
-
} m256i_uint16_t;
|
10416
|
-
|
10417
|
-
m256i_uint16_t v_gindex;
|
9623
|
+
#elif defined __AVX2__
|
10418
9624
|
|
10419
9625
|
__m256 accum = _mm256_setzero_ps();
|
9626
|
+
float accum1 = 0;
|
10420
9627
|
for (int i = 0; i < nb; ++i) {
|
10421
9628
|
|
10422
|
-
const int8_t
|
10423
|
-
const uint8_t
|
10424
|
-
const
|
9629
|
+
const int8_t * q8 = y[i].qs;
|
9630
|
+
const uint8_t * qs = x[i].qs;
|
9631
|
+
const uint16_t * qh = x[i].qh;
|
10425
9632
|
|
10426
9633
|
__m256i sumi = _mm256_setzero_si256();
|
10427
|
-
|
10428
|
-
|
10429
|
-
|
10430
|
-
|
10431
|
-
const __m256i
|
10432
|
-
|
10433
|
-
|
9634
|
+
int sumi1 = 0;
|
9635
|
+
for (int ib = 0; ib < QK_K/32; ib += 2) {
|
9636
|
+
const __m256i q1b_1 = _mm256_set_epi64x(iq1s_grid[qs[3] | ((qh[ib+0] >> 1) & 0x700)], iq1s_grid[qs[2] | ((qh[ib+0] << 2) & 0x700)],
|
9637
|
+
iq1s_grid[qs[1] | ((qh[ib+0] << 5) & 0x700)], iq1s_grid[qs[0] | ((qh[ib+0] << 8) & 0x700)]);
|
9638
|
+
const __m256i q1b_2 = _mm256_set_epi64x(iq1s_grid[qs[7] | ((qh[ib+1] >> 1) & 0x700)], iq1s_grid[qs[6] | ((qh[ib+1] << 2) & 0x700)],
|
9639
|
+
iq1s_grid[qs[5] | ((qh[ib+1] << 5) & 0x700)], iq1s_grid[qs[4] | ((qh[ib+1] << 8) & 0x700)]);
|
9640
|
+
qs += 8;
|
9641
|
+
const __m256i q8b_1 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32;
|
9642
|
+
const __m256i q8b_2 = _mm256_loadu_si256((const __m256i*)q8); q8 += 32;
|
10434
9643
|
|
10435
|
-
|
10436
|
-
|
10437
|
-
|
10438
|
-
|
10439
|
-
|
10440
|
-
|
10441
|
-
const __m256i p = _mm256_madd_epi16(s16, dot);
|
10442
|
-
sumi = _mm256_add_epi32(sumi, p);
|
10443
|
-
}
|
9644
|
+
const __m256i dot1 = mul_add_epi8(q1b_1, q8b_1);
|
9645
|
+
const __m256i dot2 = mul_add_epi8(q1b_2, q8b_2);
|
9646
|
+
const int16_t ls1 = 2*((qh[ib+0] >> 12) & 7) + 1;
|
9647
|
+
const int16_t ls2 = 2*((qh[ib+1] >> 12) & 7) + 1;
|
9648
|
+
const __m256i p1 = _mm256_madd_epi16(dot1, _mm256_set1_epi16(ls1));
|
9649
|
+
const __m256i p2 = _mm256_madd_epi16(dot2, _mm256_set1_epi16(ls2));
|
10444
9650
|
|
9651
|
+
sumi = _mm256_add_epi32(sumi, _mm256_add_epi32(p1, p2));
|
9652
|
+
sumi1 += (y[i].bsums[2*ib+0] + y[i].bsums[2*ib+1]) * (qh[ib+0] & 0x8000 ? -1 : 1) * ls1
|
9653
|
+
+ (y[i].bsums[2*ib+2] + y[i].bsums[2*ib+3]) * (qh[ib+1] & 0x8000 ? -1 : 1) * ls2;
|
10445
9654
|
}
|
10446
9655
|
|
10447
|
-
|
9656
|
+
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
9657
|
+
accum = _mm256_fmadd_ps(_mm256_set1_ps(d), _mm256_cvtepi32_ps(sumi), accum);
|
9658
|
+
accum1 += d * sumi1;
|
10448
9659
|
|
10449
9660
|
}
|
10450
9661
|
|
10451
|
-
*s = hsum_float_8(accum);
|
9662
|
+
*s = hsum_float_8(accum) + IQ1S_DELTA * accum1;
|
10452
9663
|
|
10453
9664
|
#else
|
10454
9665
|
|
10455
|
-
int db[4];
|
10456
|
-
uint16_t idx[4];
|
10457
|
-
|
10458
9666
|
float sumf = 0;
|
10459
|
-
for (int i = 0; i < nb; ++
|
9667
|
+
for (int i = 0; i < nb; i++) {
|
10460
9668
|
|
10461
|
-
const int8_t
|
10462
|
-
const uint8_t
|
10463
|
-
const
|
9669
|
+
const int8_t * q8 = y[i].qs;
|
9670
|
+
const uint8_t * qs = x[i].qs;
|
9671
|
+
const uint16_t * qh = x[i].qh;
|
10464
9672
|
|
10465
|
-
int sumi = 0;
|
10466
|
-
for (int
|
10467
|
-
|
10468
|
-
|
10469
|
-
|
10470
|
-
idx[3] = qs[3] | ((sc[1] & 0x80) << 1);
|
10471
|
-
db[0] = (2*(sc[0] & 7) + 1);
|
10472
|
-
db[1] = (2*((sc[0] >> 4) & 7) + 1);
|
10473
|
-
db[2] = (2*(sc[1] & 7) + 1);
|
10474
|
-
db[3] = (2*((sc[1] >> 4) & 7) + 1);
|
9673
|
+
int sumi = 0, sumi1 = 0;
|
9674
|
+
for (int ib = 0; ib < QK_K/32; ++ib) {
|
9675
|
+
const int ls = 2*((qh[ib] >> 12) & 7) + 1;
|
9676
|
+
const int delta = qh[ib] & 0x8000 ? -1 : 1;
|
9677
|
+
int lsum = 0;
|
10475
9678
|
for (int l = 0; l < 4; ++l) {
|
10476
|
-
const int8_t * grid = (const int8_t *)(iq1s_grid +
|
10477
|
-
int
|
10478
|
-
|
10479
|
-
|
9679
|
+
const int8_t * grid = (const int8_t *)(iq1s_grid + (qs[l] | (((qh[ib] >> 3*l) & 7) << 8)));
|
9680
|
+
for (int j = 0; j < 8; ++j) {
|
9681
|
+
lsum += q8[j] * grid[j];
|
9682
|
+
}
|
10480
9683
|
q8 += 8;
|
10481
9684
|
}
|
9685
|
+
sumi += ls * lsum;
|
9686
|
+
sumi1 += ls * delta * (y[i].bsums[2*ib+0] + y[i].bsums[2*ib+1]);
|
10482
9687
|
qs += 4;
|
10483
|
-
sc += 2;
|
10484
9688
|
}
|
10485
9689
|
|
10486
|
-
sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * sumi;
|
9690
|
+
sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1);
|
10487
9691
|
}
|
10488
9692
|
|
10489
9693
|
*s = sumf;
|
@@ -10744,7 +9948,7 @@ static inline int iq2_grid_size(enum ggml_type type) {
|
|
10744
9948
|
GGML_ASSERT(type == GGML_TYPE_IQ2_XXS || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ2_S);
|
10745
9949
|
return type == GGML_TYPE_IQ2_XXS ? 256 :
|
10746
9950
|
type == GGML_TYPE_IQ2_XS ? 512 :
|
10747
|
-
type == GGML_TYPE_IQ1_S ?
|
9951
|
+
type == GGML_TYPE_IQ1_S ? NGRID_IQ1S : 1024;
|
10748
9952
|
}
|
10749
9953
|
|
10750
9954
|
static int iq2_compare_func(const void * left, const void * right) {
|
@@ -10811,39 +10015,135 @@ void iq2xs_init_impl(enum ggml_type type) {
|
|
10811
10015
|
40962, 40968, 40970, 40992, 41002, 41120, 41297, 41305, 41382, 41472, 41474, 41480, 41514, 41600, 41632, 42048,
|
10812
10016
|
42133, 42597, 42648, 43018, 43040, 43042, 43048, 43168, 43176, 43268, 43396, 43398, 43560, 43562, 43665, 43690,
|
10813
10017
|
};
|
10814
|
-
static const uint16_t
|
10815
|
-
|
10816
|
-
|
10817
|
-
|
10818
|
-
|
10819
|
-
|
10820
|
-
|
10821
|
-
|
10822
|
-
|
10823
|
-
|
10824
|
-
|
10825
|
-
|
10826
|
-
|
10827
|
-
|
10828
|
-
|
10829
|
-
|
10830
|
-
|
10831
|
-
|
10832
|
-
|
10833
|
-
|
10834
|
-
|
10835
|
-
|
10836
|
-
|
10837
|
-
|
10838
|
-
|
10839
|
-
|
10840
|
-
|
10841
|
-
|
10842
|
-
|
10843
|
-
|
10844
|
-
|
10845
|
-
|
10846
|
-
|
10018
|
+
static const uint16_t kgrid_1bit_2048[NGRID_IQ1S] = {
|
10019
|
+
0, 2, 5, 8, 10, 17, 21, 32, 34, 40, 42, 69, 81, 84, 86, 101,
|
10020
|
+
128, 130, 136, 138, 149, 160, 162, 168, 170, 260, 261, 273, 276, 278, 281, 282,
|
10021
|
+
293, 321, 326, 329, 338, 341, 346, 353, 356, 358, 360, 389, 401, 404, 406, 421,
|
10022
|
+
512, 514, 520, 522, 533, 544, 546, 552, 554, 581, 593, 601, 612, 617, 640, 642,
|
10023
|
+
648, 650, 657, 661, 665, 672, 674, 680, 682, 1041, 1044, 1046, 1061, 1089, 1097, 1109,
|
10024
|
+
1114, 1124, 1125, 1169, 1177, 1189, 1281, 1284, 1285, 1286, 1301, 1304, 1306, 1321, 1344, 1349,
|
10025
|
+
1354, 1360, 1361, 1364, 1365, 1366, 1369, 1376, 1378, 1381, 1384, 1386, 1409, 1425, 1429, 1432,
|
10026
|
+
1434, 1441, 1444, 1445, 1446, 1449, 1556, 1561, 1601, 1604, 1616, 1618, 1621, 1624, 1632, 1633,
|
10027
|
+
1638, 1641, 1669, 1681, 1684, 1689, 2048, 2050, 2056, 2058, 2069, 2080, 2082, 2088, 2090, 2117,
|
10028
|
+
2129, 2134, 2149, 2176, 2178, 2184, 2186, 2197, 2208, 2210, 2216, 2218, 2309, 2321, 2324, 2329,
|
10029
|
+
2340, 2341, 2369, 2384, 2385, 2389, 2401, 2404, 2409, 2449, 2452, 2454, 2457, 2469, 2560, 2562,
|
10030
|
+
2568, 2570, 2581, 2592, 2594, 2600, 2602, 2629, 2641, 2649, 2657, 2661, 2688, 2690, 2693, 2696,
|
10031
|
+
2698, 2709, 2720, 2722, 2728, 2730, 4112, 4113, 4116, 4121, 4132, 4133, 4161, 4164, 4176, 4181,
|
10032
|
+
4184, 4193, 4196, 4197, 4201, 4241, 4244, 4246, 4257, 4261, 4353, 4356, 4358, 4361, 4368, 4370,
|
10033
|
+
4373, 4376, 4385, 4388, 4393, 4421, 4426, 4432, 4433, 4434, 4436, 4437, 4438, 4441, 4448, 4453,
|
10034
|
+
4484, 4498, 4501, 4513, 4516, 4625, 4628, 4630, 4645, 4672, 4678, 4681, 4690, 4693, 4696, 4698,
|
10035
|
+
4708, 4710, 4741, 4753, 4756, 4758, 4773, 5121, 5126, 5129, 5140, 5141, 5144, 5145, 5153, 5158,
|
10036
|
+
5185, 5189, 5190, 5192, 5194, 5201, 5204, 5205, 5206, 5209, 5218, 5221, 5224, 5252, 5257, 5264,
|
10037
|
+
5268, 5269, 5272, 5273, 5274, 5281, 5284, 5285, 5289, 5378, 5381, 5386, 5393, 5396, 5397, 5398,
|
10038
|
+
5401, 5408, 5410, 5413, 5416, 5418, 5441, 5444, 5445, 5446, 5457, 5458, 5460, 5461, 5462, 5465,
|
10039
|
+
5466, 5473, 5476, 5477, 5478, 5481, 5504, 5506, 5508, 5509, 5512, 5514, 5520, 5521, 5524, 5525,
|
10040
|
+
5526, 5529, 5530, 5536, 5538, 5541, 5633, 5636, 5637, 5638, 5653, 5654, 5656, 5658, 5665, 5670,
|
10041
|
+
5696, 5698, 5700, 5701, 5704, 5706, 5713, 5717, 5718, 5720, 5721, 5729, 5732, 5733, 5736, 5737,
|
10042
|
+
5738, 5766, 5770, 5778, 5781, 5796, 5801, 6161, 6166, 6181, 6209, 6212, 6214, 6217, 6224, 6229,
|
10043
|
+
6232, 6234, 6240, 6241, 6244, 6246, 6249, 6277, 6289, 6292, 6309, 6416, 6418, 6421, 6426, 6433,
|
10044
|
+
6437, 6466, 6468, 6469, 6472, 6481, 6484, 6485, 6486, 6489, 6490, 6496, 6501, 6506, 6537, 6545,
|
10045
|
+
6546, 6549, 6552, 6561, 6566, 6569, 6665, 6678, 6692, 6694, 6724, 6726, 6729, 6736, 6738, 6741,
|
10046
|
+
6744, 6753, 6758, 6761, 6789, 6801, 6806, 6810, 8192, 8194, 8200, 8202, 8213, 8224, 8226, 8229,
|
10047
|
+
8232, 8234, 8261, 8273, 8281, 8289, 8293, 8320, 8322, 8328, 8330, 8341, 8352, 8354, 8357, 8360,
|
10048
|
+
8362, 8453, 8465, 8468, 8473, 8485, 8514, 8516, 8521, 8533, 8536, 8538, 8545, 8548, 8549, 8550,
|
10049
|
+
8581, 8592, 8598, 8601, 8613, 8705, 8712, 8714, 8721, 8725, 8736, 8738, 8744, 8746, 8773, 8785,
|
10050
|
+
8790, 8793, 8805, 8833, 8840, 8842, 8849, 8853, 8864, 8866, 8872, 8874, 9221, 9236, 9238, 9241,
|
10051
|
+
9253, 9284, 9285, 9286, 9289, 9298, 9301, 9304, 9306, 9318, 9349, 9361, 9364, 9369, 9377, 9381,
|
10052
|
+
9481, 9493, 9505, 9513, 9536, 9541, 9544, 9553, 9556, 9557, 9561, 9570, 9573, 9576, 9609, 9616,
|
10053
|
+
9620, 9621, 9624, 9626, 9633, 9636, 9638, 9641, 9733, 9744, 9746, 9753, 9765, 9793, 9801, 9813,
|
10054
|
+
9824, 9825, 9833, 9860, 9862, 9872, 9882, 10240, 10242, 10248, 10250, 10261, 10272, 10274, 10280, 10282,
|
10055
|
+
10309, 10321, 10324, 10341, 10368, 10370, 10376, 10378, 10400, 10402, 10408, 10410, 10505, 10513, 10516, 10521,
|
10056
|
+
10533, 10566, 10569, 10578, 10581, 10593, 10596, 10598, 10601, 10629, 10640, 10646, 10649, 10660, 10661, 10752,
|
10057
|
+
10754, 10760, 10762, 10784, 10786, 10792, 10794, 10821, 10833, 10838, 10841, 10853, 10880, 10882, 10888, 10890,
|
10058
|
+
10901, 10912, 10914, 10920, 10922, 16389, 16401, 16406, 16421, 16457, 16466, 16469, 16472, 16474, 16481, 16484,
|
10059
|
+
16486, 16532, 16537, 16545, 16550, 16640, 16641, 16644, 16646, 16649, 16658, 16661, 16662, 16664, 16666, 16673,
|
10060
|
+
16678, 16681, 16709, 16712, 16714, 16721, 16724, 16725, 16726, 16729, 16730, 16741, 16744, 16746, 16769, 16772,
|
10061
|
+
16774, 16784, 16786, 16789, 16800, 16801, 16802, 16901, 16913, 16916, 16918, 16933, 16961, 16978, 16981, 16986,
|
10062
|
+
16996, 17001, 17033, 17044, 17061, 17409, 17429, 17433, 17449, 17477, 17480, 17482, 17489, 17492, 17493, 17494,
|
10063
|
+
17505, 17506, 17509, 17512, 17514, 17537, 17542, 17545, 17552, 17554, 17557, 17568, 17569, 17577, 17665, 17666,
|
10064
|
+
17669, 17674, 17681, 17684, 17685, 17686, 17689, 17696, 17701, 17706, 17729, 17732, 17733, 17734, 17737, 17744,
|
10065
|
+
17745, 17748, 17749, 17750, 17752, 17753, 17761, 17764, 17765, 17766, 17769, 17794, 17796, 17797, 17800, 17809,
|
10066
|
+
17812, 17813, 17814, 17817, 17818, 17829, 17832, 17834, 17921, 17925, 17929, 17940, 17941, 17944, 17946, 17953,
|
10067
|
+
17956, 17961, 17984, 17986, 17989, 17992, 18000, 18001, 18002, 18005, 18006, 18009, 18018, 18021, 18024, 18049,
|
10068
|
+
18053, 18058, 18068, 18069, 18081, 18084, 18086, 18437, 18449, 18453, 18458, 18469, 18498, 18505, 18512, 18517,
|
10069
|
+
18520, 18529, 18532, 18534, 18537, 18565, 18577, 18580, 18582, 18585, 18597, 18689, 18693, 18694, 18698, 18704,
|
10070
|
+
18708, 18709, 18712, 18721, 18724, 18726, 18752, 18757, 18762, 18769, 18770, 18772, 18773, 18774, 18777, 18784,
|
10071
|
+
18786, 18789, 18790, 18794, 18822, 18825, 18834, 18837, 18838, 18840, 18849, 18852, 18854, 18857, 18966, 19012,
|
10072
|
+
19014, 19017, 19029, 19032, 19034, 19044, 19049, 19092, 19109, 20481, 20484, 20485, 20486, 20489, 20498, 20501,
|
10073
|
+
20506, 20513, 20516, 20521, 20544, 20549, 20552, 20561, 20564, 20565, 20566, 20569, 20581, 20584, 20614, 20617,
|
10074
|
+
20629, 20632, 20640, 20641, 20646, 20649, 20741, 20744, 20745, 20746, 20753, 20756, 20757, 20758, 20760, 20761,
|
10075
|
+
20768, 20773, 20774, 20776, 20778, 20801, 20804, 20805, 20806, 20809, 20816, 20817, 20818, 20820, 20821, 20822,
|
10076
|
+
20824, 20825, 20826, 20833, 20836, 20837, 20838, 20841, 20866, 20869, 20881, 20884, 20885, 20886, 20889, 20896,
|
10077
|
+
20901, 20906, 20993, 20998, 21010, 21013, 21018, 21025, 21028, 21058, 21061, 21066, 21073, 21076, 21077, 21078,
|
10078
|
+
21081, 21090, 21093, 21125, 21136, 21138, 21141, 21145, 21146, 21156, 21508, 21509, 21521, 21524, 21525, 21526,
|
10079
|
+
21528, 21529, 21537, 21541, 21544, 21546, 21569, 21572, 21573, 21574, 21577, 21578, 21584, 21585, 21588, 21589,
|
10080
|
+
21590, 21592, 21593, 21594, 21601, 21602, 21604, 21605, 21606, 21609, 21632, 21640, 21642, 21649, 21652, 21653,
|
10081
|
+
21654, 21657, 21665, 21668, 21669, 21674, 21761, 21762, 21764, 21765, 21766, 21769, 21776, 21777, 21778, 21780,
|
10082
|
+
21781, 21782, 21785, 21786, 21793, 21796, 21797, 21798, 21801, 21824, 21825, 21826, 21828, 21829, 21830, 21832,
|
10083
|
+
21833, 21840, 21841, 21842, 21844, 21845, 21846, 21848, 21849, 21850, 21856, 21857, 21860, 21861, 21862, 21864,
|
10084
|
+
21865, 21866, 21889, 21892, 21893, 21897, 21898, 21904, 21905, 21908, 21909, 21910, 21912, 21913, 21921, 21924,
|
10085
|
+
21925, 21926, 21929, 22016, 22017, 22018, 22020, 22022, 22024, 22025, 22033, 22036, 22037, 22040, 22041, 22048,
|
10086
|
+
22049, 22050, 22052, 22053, 22054, 22056, 22057, 22081, 22085, 22086, 22088, 22089, 22090, 22096, 22097, 22098,
|
10087
|
+
22100, 22101, 22102, 22104, 22105, 22106, 22113, 22116, 22117, 22121, 22146, 22149, 22150, 22152, 22153, 22154,
|
10088
|
+
22161, 22165, 22170, 22178, 22181, 22182, 22184, 22185, 22532, 22533, 22534, 22537, 22544, 22549, 22552, 22561,
|
10089
|
+
22570, 22597, 22600, 22602, 22609, 22612, 22613, 22614, 22616, 22617, 22624, 22626, 22628, 22629, 22658, 22665,
|
10090
|
+
22672, 22674, 22677, 22680, 22689, 22697, 22785, 22786, 22789, 22794, 22801, 22804, 22805, 22806, 22809, 22821,
|
10091
|
+
22849, 22852, 22853, 22854, 22857, 22864, 22865, 22866, 22868, 22869, 22870, 22872, 22873, 22874, 22881, 22884,
|
10092
|
+
22885, 22886, 22889, 22913, 22917, 22921, 22929, 22932, 22933, 22934, 22936, 22937, 22949, 23044, 23048, 23061,
|
10093
|
+
23066, 23072, 23077, 23078, 23081, 23109, 23112, 23113, 23121, 23125, 23126, 23128, 23129, 23138, 23141, 23144,
|
10094
|
+
23146, 23169, 23178, 23186, 23189, 23190, 23192, 23194, 23201, 24581, 24596, 24598, 24601, 24613, 24644, 24656,
|
10095
|
+
24661, 24662, 24664, 24666, 24673, 24676, 24678, 24681, 24705, 24726, 24741, 24833, 24836, 24838, 24841, 24850,
|
10096
|
+
24853, 24865, 24866, 24870, 24873, 24901, 24905, 24913, 24917, 24918, 24921, 24933, 24934, 24938, 24964, 24970,
|
10097
|
+
24978, 24981, 24993, 24998, 25001, 25105, 25110, 25113, 25152, 25153, 25158, 25173, 25174, 25176, 25184, 25221,
|
10098
|
+
25233, 25238, 25253, 25617, 25618, 25621, 25622, 25626, 25633, 25638, 25641, 25664, 25666, 25669, 25672, 25674,
|
10099
|
+
25681, 25684, 25685, 25686, 25689, 25690, 25696, 25698, 25701, 25732, 25733, 25737, 25744, 25746, 25748, 25749,
|
10100
|
+
25750, 25752, 25754, 25761, 25764, 25769, 25861, 25864, 25866, 25873, 25877, 25878, 25881, 25924, 25925, 25926,
|
10101
|
+
25929, 25936, 25937, 25940, 25941, 25942, 25945, 25953, 25956, 25957, 25958, 25961, 25990, 25993, 25994, 26001,
|
10102
|
+
26005, 26006, 26009, 26010, 26018, 26021, 26022, 26024, 26114, 26121, 26133, 26144, 26150, 26152, 26153, 26176,
|
10103
|
+
26181, 26184, 26186, 26193, 26196, 26197, 26198, 26200, 26202, 26208, 26213, 26216, 26240, 26242, 26245, 26250,
|
10104
|
+
26260, 26262, 26264, 26265, 26272, 26276, 26278, 26282, 26646, 26649, 26661, 26689, 26706, 26709, 26714, 26721,
|
10105
|
+
26729, 26757, 26769, 26776, 26790, 26881, 26884, 26896, 26901, 26913, 26916, 26918, 26921, 26944, 26945, 26949,
|
10106
|
+
26950, 26952, 26961, 26964, 26965, 26966, 26969, 26976, 26981, 26986, 27010, 27012, 27018, 27029, 27041, 27044,
|
10107
|
+
27045, 27049, 27153, 27158, 27160, 27201, 27204, 27209, 27216, 27221, 27224, 27226, 27236, 27237, 27241, 27270,
|
10108
|
+
27284, 27288, 27290, 27302, 32768, 32770, 32776, 32778, 32800, 32802, 32808, 32810, 32837, 32848, 32849, 32852,
|
10109
|
+
32854, 32857, 32869, 32896, 32898, 32904, 32906, 32917, 32928, 32930, 32936, 32938, 33029, 33041, 33044, 33046,
|
10110
|
+
33049, 33061, 33089, 33092, 33097, 33104, 33106, 33109, 33110, 33112, 33113, 33124, 33126, 33129, 33157, 33161,
|
10111
|
+
33172, 33174, 33177, 33189, 33280, 33282, 33288, 33290, 33301, 33312, 33314, 33320, 33322, 33361, 33364, 33369,
|
10112
|
+
33381, 33408, 33410, 33416, 33418, 33429, 33440, 33442, 33448, 33450, 33812, 33817, 33857, 33860, 33873, 33877,
|
10113
|
+
33882, 33889, 33892, 33897, 33940, 33945, 34049, 34057, 34066, 34069, 34074, 34086, 34089, 34112, 34113, 34117,
|
10114
|
+
34120, 34129, 34132, 34133, 34134, 34137, 34138, 34149, 34150, 34152, 34154, 34177, 34180, 34182, 34185, 34192,
|
10115
|
+
34194, 34197, 34200, 34214, 34321, 34326, 34329, 34341, 34369, 34372, 34377, 34378, 34384, 34389, 34393, 34394,
|
10116
|
+
34401, 34406, 34410, 34437, 34449, 34458, 34468, 34816, 34818, 34824, 34826, 34837, 34848, 34850, 34856, 34858,
|
10117
|
+
34881, 34885, 34897, 34900, 34905, 34917, 34921, 34944, 34946, 34952, 34954, 34965, 34976, 34978, 34984, 34986,
|
10118
|
+
35077, 35078, 35089, 35092, 35094, 35109, 35137, 35140, 35142, 35145, 35152, 35154, 35157, 35162, 35169, 35172,
|
10119
|
+
35205, 35222, 35225, 35237, 35328, 35330, 35336, 35338, 35349, 35360, 35362, 35368, 35370, 35397, 35409, 35412,
|
10120
|
+
35414, 35456, 35458, 35464, 35466, 35477, 35488, 35490, 35496, 35498, 36869, 36881, 36886, 36888, 36889, 36901,
|
10121
|
+
36929, 36934, 36937, 36949, 36952, 36954, 36969, 36970, 36997, 37009, 37012, 37014, 37017, 37029, 37121, 37124,
|
10122
|
+
37126, 37129, 37136, 37141, 37144, 37146, 37153, 37156, 37158, 37161, 37184, 37189, 37200, 37201, 37204, 37205,
|
10123
|
+
37206, 37209, 37218, 37221, 37252, 37254, 37266, 37269, 37272, 37281, 37284, 37286, 37289, 37381, 37393, 37396,
|
10124
|
+
37401, 37413, 37444, 37446, 37449, 37456, 37458, 37461, 37464, 37478, 37481, 37509, 37524, 37526, 37545, 37889,
|
10125
|
+
37892, 37894, 37904, 37909, 37912, 37926, 37952, 37962, 37969, 37972, 37973, 37974, 37976, 37977, 37984, 37985,
|
10126
|
+
37986, 37989, 38020, 38022, 38034, 38036, 38037, 38040, 38049, 38057, 38144, 38149, 38152, 38154, 38160, 38161,
|
10127
|
+
38164, 38165, 38166, 38169, 38177, 38181, 38185, 38186, 38209, 38212, 38213, 38214, 38217, 38224, 38225, 38226,
|
10128
|
+
38228, 38229, 38230, 38232, 38233, 38234, 38241, 38244, 38245, 38246, 38249, 38273, 38277, 38280, 38289, 38290,
|
10129
|
+
38292, 38293, 38294, 38297, 38298, 38304, 38306, 38309, 38312, 38314, 38401, 38404, 38416, 38421, 38425, 38432,
|
10130
|
+
38438, 38441, 38469, 38472, 38473, 38481, 38482, 38485, 38486, 38489, 38501, 38504, 38530, 38532, 38537, 38538,
|
10131
|
+
38546, 38548, 38549, 38564, 38566, 38569, 38917, 38934, 38937, 38949, 38977, 38982, 38992, 38994, 38997, 38998,
|
10132
|
+
39002, 39012, 39013, 39045, 39057, 39062, 39065, 39077, 39172, 39174, 39177, 39184, 39186, 39189, 39192, 39194,
|
10133
|
+
39200, 39201, 39204, 39206, 39232, 39234, 39237, 39240, 39242, 39249, 39252, 39253, 39254, 39257, 39266, 39269,
|
10134
|
+
39270, 39274, 39297, 39300, 39312, 39314, 39317, 39322, 39329, 39334, 39429, 39445, 39461, 39492, 39494, 39497,
|
10135
|
+
39504, 39509, 39512, 39521, 39557, 39569, 39572, 39573, 39574, 40960, 40962, 40968, 40970, 40981, 40992, 40994,
|
10136
|
+
41000, 41002, 41029, 41041, 41044, 41046, 41049, 41088, 41090, 41096, 41098, 41109, 41120, 41122, 41128, 41130,
|
10137
|
+
41221, 41225, 41233, 41236, 41238, 41241, 41242, 41286, 41289, 41297, 41301, 41304, 41306, 41313, 41316, 41349,
|
10138
|
+
41360, 41362, 41366, 41369, 41474, 41480, 41482, 41488, 41497, 41506, 41512, 41514, 41541, 41553, 41558, 41561,
|
10139
|
+
41573, 41600, 41602, 41608, 41610, 41621, 41632, 41634, 41640, 41642, 42009, 42021, 42049, 42052, 42064, 42068,
|
10140
|
+
42069, 42072, 42074, 42081, 42085, 42086, 42088, 42089, 42117, 42246, 42249, 42256, 42258, 42261, 42264, 42278,
|
10141
|
+
42281, 42306, 42309, 42321, 42324, 42325, 42326, 42329, 42341, 42346, 42369, 42372, 42373, 42374, 42377, 42386,
|
10142
|
+
42389, 42392, 42501, 42513, 42518, 42522, 42529, 42533, 42564, 42566, 42570, 42578, 42581, 42582, 42584, 42592,
|
10143
|
+
42594, 42630, 42640, 42645, 42646, 42649, 42657, 42660, 42662, 43008, 43010, 43016, 43018, 43040, 43042, 43048,
|
10144
|
+
43050, 43089, 43092, 43094, 43097, 43136, 43138, 43144, 43146, 43157, 43168, 43170, 43176, 43178, 43269, 43284,
|
10145
|
+
43289, 43297, 43301, 43329, 43344, 43349, 43354, 43361, 43366, 43369, 43408, 43414, 43520, 43522, 43528, 43530,
|
10146
|
+
43552, 43554, 43560, 43562, 43601, 43604, 43606, 43648, 43650, 43656, 43658, 43669, 43680, 43682, 43688, 43690,
|
10847
10147
|
};
|
10848
10148
|
static const uint16_t kgrid_2bit_1024[1024] = {
|
10849
10149
|
0, 2, 5, 8, 10, 17, 20, 22, 25, 32, 34, 37, 40, 65, 68, 70,
|
@@ -10917,12 +10217,12 @@ void iq2xs_init_impl(enum ggml_type type) {
|
|
10917
10217
|
const int nwant = type == GGML_TYPE_IQ1_S ? 3 : type == GGML_TYPE_IQ2_S ? 1 : 2;
|
10918
10218
|
const uint16_t * kgrid = type == GGML_TYPE_IQ2_XXS ? kgrid_2bit_256 :
|
10919
10219
|
type == GGML_TYPE_IQ2_XS ? kgrid_2bit_512 :
|
10920
|
-
type == GGML_TYPE_IQ1_S ?
|
10220
|
+
type == GGML_TYPE_IQ1_S ? kgrid_1bit_2048 : kgrid_2bit_1024;
|
10921
10221
|
uint64_t * kgrid_q2xs;
|
10922
10222
|
int * kmap_q2xs;
|
10923
10223
|
uint16_t * kneighbors_q2xs;
|
10924
10224
|
|
10925
|
-
printf("================================================================= %s(grid_size = %d)\n", __func__, grid_size);
|
10225
|
+
//printf("================================================================= %s(grid_size = %d)\n", __func__, grid_size);
|
10926
10226
|
uint64_t * the_grid = (uint64_t *)malloc(grid_size*sizeof(uint64_t));
|
10927
10227
|
for (int k = 0; k < grid_size; ++k) {
|
10928
10228
|
int8_t * pos = (int8_t *)(the_grid + k);
|
@@ -10977,7 +10277,7 @@ void iq2xs_init_impl(enum ggml_type type) {
|
|
10977
10277
|
}
|
10978
10278
|
num_neighbors += n;
|
10979
10279
|
}
|
10980
|
-
printf("%s: %d neighbours in total\n", __func__, num_neighbors);
|
10280
|
+
//printf("%s: %d neighbours in total\n", __func__, num_neighbors);
|
10981
10281
|
kneighbors_q2xs = (uint16_t *)malloc((num_neighbors + num_not_in_map)*sizeof(uint16_t));
|
10982
10282
|
iq2_data[gindex].neighbours = kneighbors_q2xs;
|
10983
10283
|
int counter = 0;
|
@@ -11400,8 +10700,7 @@ static void quantize_row_iq2_xs_impl(const float * restrict x, void * restrict v
|
|
11400
10700
|
}
|
11401
10701
|
}
|
11402
10702
|
|
11403
|
-
size_t quantize_iq2_xxs(const float * src, void * dst, int nrow, int n_per_row,
|
11404
|
-
(void)hist;
|
10703
|
+
size_t quantize_iq2_xxs(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
11405
10704
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
11406
10705
|
int nblock = n_per_row/QK_K;
|
11407
10706
|
char * qrow = (char *)dst;
|
@@ -11413,8 +10712,7 @@ size_t quantize_iq2_xxs(const float * src, void * dst, int nrow, int n_per_row,
|
|
11413
10712
|
return nrow * nblock * sizeof(block_iq2_xxs);
|
11414
10713
|
}
|
11415
10714
|
|
11416
|
-
size_t quantize_iq2_xs(const float * src, void * dst, int nrow, int n_per_row,
|
11417
|
-
(void)hist;
|
10715
|
+
size_t quantize_iq2_xs(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
11418
10716
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
11419
10717
|
int nblock = n_per_row/QK_K;
|
11420
10718
|
char * qrow = (char *)dst;
|
@@ -11518,7 +10816,7 @@ void iq3xs_init_impl(int grid_size) {
|
|
11518
10816
|
int * kmap_q3xs;
|
11519
10817
|
uint16_t * kneighbors_q3xs;
|
11520
10818
|
|
11521
|
-
printf("================================================================= %s(grid_size = %d)\n", __func__, grid_size);
|
10819
|
+
//printf("================================================================= %s(grid_size = %d)\n", __func__, grid_size);
|
11522
10820
|
uint32_t * the_grid = (uint32_t *)malloc(grid_size*sizeof(uint32_t));
|
11523
10821
|
for (int k = 0; k < grid_size; ++k) {
|
11524
10822
|
int8_t * pos = (int8_t *)(the_grid + k);
|
@@ -11573,7 +10871,7 @@ void iq3xs_init_impl(int grid_size) {
|
|
11573
10871
|
}
|
11574
10872
|
num_neighbors += n;
|
11575
10873
|
}
|
11576
|
-
printf("%s: %d neighbours in total\n", __func__, num_neighbors);
|
10874
|
+
//printf("%s: %d neighbours in total\n", __func__, num_neighbors);
|
11577
10875
|
kneighbors_q3xs = (uint16_t *)malloc((num_neighbors + num_not_in_map)*sizeof(uint16_t));
|
11578
10876
|
iq3_data[gindex].neighbours = kneighbors_q3xs;
|
11579
10877
|
int counter = 0;
|
@@ -11856,8 +11154,7 @@ static void quantize_row_iq3_xxs_impl(int grid_size, const float * restrict x, v
|
|
11856
11154
|
}
|
11857
11155
|
}
|
11858
11156
|
|
11859
|
-
size_t quantize_iq3_xxs(const float * src, void * dst, int nrow, int n_per_row,
|
11860
|
-
(void)hist;
|
11157
|
+
size_t quantize_iq3_xxs(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
11861
11158
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
11862
11159
|
int nblock = n_per_row/QK_K;
|
11863
11160
|
char * qrow = (char *)dst;
|
@@ -12063,8 +11360,7 @@ static void quantize_row_iq3_s_impl(int block_size, const float * restrict x, vo
|
|
12063
11360
|
}
|
12064
11361
|
|
12065
11362
|
#define IQ3S_BLOCK_SIZE 32
|
12066
|
-
size_t quantize_iq3_s(const float * src, void * dst, int nrow, int n_per_row,
|
12067
|
-
(void)hist;
|
11363
|
+
size_t quantize_iq3_s(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
12068
11364
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
12069
11365
|
int nblock = n_per_row/QK_K;
|
12070
11366
|
float scales[QK_K/IQ3S_BLOCK_SIZE];
|
@@ -12094,7 +11390,7 @@ void quantize_row_iq3_s(const float * restrict x, void * restrict vy, int k) {
|
|
12094
11390
|
|
12095
11391
|
void quantize_row_iq3_s_reference(const float * restrict x, block_iq3_s * restrict y, int k) {
|
12096
11392
|
assert(k % QK_K == 0);
|
12097
|
-
quantize_iq3_s(x, y, 1, k, NULL
|
11393
|
+
quantize_iq3_s(x, y, 1, k, NULL);
|
12098
11394
|
}
|
12099
11395
|
|
12100
11396
|
|
@@ -12160,12 +11456,70 @@ static int iq1_find_best_neighbour(const uint16_t * restrict neighbours, const u
|
|
12160
11456
|
return grid_index;
|
12161
11457
|
}
|
12162
11458
|
|
11459
|
+
static int iq1_find_best_neighbour2(const uint16_t * restrict neighbours, const uint64_t * restrict grid,
|
11460
|
+
const float * restrict xval, const float * restrict weight, float scale, const float * restrict xg, int8_t * restrict L, int ngrid) {
|
11461
|
+
int num_neighbors = neighbours[0];
|
11462
|
+
GGML_ASSERT(num_neighbors > 0);
|
11463
|
+
float best_score = FLT_MAX;
|
11464
|
+
int grid_index = -1;
|
11465
|
+
for (int j = 1; j <= num_neighbors; ++j) {
|
11466
|
+
const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
|
11467
|
+
float d2 = 0;
|
11468
|
+
for (int i = 0; i < 8; ++i) {
|
11469
|
+
float q = xg[(pg[i] - 1)/2];
|
11470
|
+
float w = weight[i];
|
11471
|
+
float diff = scale*q - xval[i];
|
11472
|
+
d2 += w*diff*diff;
|
11473
|
+
}
|
11474
|
+
if (d2 < best_score) {
|
11475
|
+
best_score = d2;
|
11476
|
+
grid_index = neighbours[j];
|
11477
|
+
}
|
11478
|
+
}
|
11479
|
+
if (grid_index < 0) {
|
11480
|
+
for (int i = 0; i < ngrid; ++i) {
|
11481
|
+
const int8_t * grid_i = (const int8_t *)(grid + i);
|
11482
|
+
float d2 = 0;
|
11483
|
+
for (int j = 0; j < 8; ++j) {
|
11484
|
+
float w = weight[j];
|
11485
|
+
float q = xg[(grid_i[j] - 1)/2];
|
11486
|
+
float diff = scale*q - xval[i];
|
11487
|
+
d2 += w*diff*diff;
|
11488
|
+
}
|
11489
|
+
if (d2 < best_score) {
|
11490
|
+
best_score = d2;
|
11491
|
+
grid_index = i;
|
11492
|
+
}
|
11493
|
+
}
|
11494
|
+
}
|
11495
|
+
if (grid_index < 0) {
|
11496
|
+
printf("Oops, did not find grid point\n");
|
11497
|
+
printf("Have %d neighbours\n", num_neighbors);
|
11498
|
+
for (int j = 1; j <= num_neighbors; ++j) {
|
11499
|
+
const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
|
11500
|
+
float sumqx = 0, sumq2 = 0;
|
11501
|
+
for (int i = 0; i < 8; ++i) {
|
11502
|
+
float q = xg[(pg[i] - 1)/2];
|
11503
|
+
float w = weight[i];
|
11504
|
+
sumqx += w*q*xval[i];
|
11505
|
+
sumq2 += w*q*q;
|
11506
|
+
}
|
11507
|
+
printf(" neighbour %d: sumqx = %g sumq2 = %g\n", j, (double)sumqx, (double)sumq2);
|
11508
|
+
}
|
11509
|
+
}
|
11510
|
+
GGML_ASSERT(grid_index >= 0);
|
11511
|
+
const int8_t * pg = (const int8_t *)(grid + grid_index);
|
11512
|
+
for (int i = 0; i < 8; ++i) L[i] = (pg[i] - 1)/2;
|
11513
|
+
return grid_index;
|
11514
|
+
}
|
11515
|
+
|
12163
11516
|
static int iq1_sort_helper(const void * left, const void * right) {
|
12164
11517
|
const float * l = left;
|
12165
11518
|
const float * r = right;
|
12166
11519
|
return *l < *r ? -1 : *l > *r ? 1 : 0;
|
12167
11520
|
}
|
12168
11521
|
|
11522
|
+
#define IQ1S_BLOCK_SIZE 32
|
12169
11523
|
static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy, int n, const float * restrict quant_weights) {
|
12170
11524
|
|
12171
11525
|
const int gindex = iq2_data_index(GGML_TYPE_IQ1_S);
|
@@ -12184,37 +11538,41 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
|
12184
11538
|
|
12185
11539
|
block_iq1_s * y = vy;
|
12186
11540
|
|
12187
|
-
float
|
12188
|
-
float
|
12189
|
-
|
12190
|
-
float
|
12191
|
-
float
|
12192
|
-
|
11541
|
+
const float x_p[3] = {-1 + IQ1S_DELTA, IQ1S_DELTA, 1 + IQ1S_DELTA};
|
11542
|
+
const float x_m[3] = {-1 - IQ1S_DELTA, -IQ1S_DELTA, 1 - IQ1S_DELTA};
|
11543
|
+
|
11544
|
+
float scales[QK_K/IQ1S_BLOCK_SIZE];
|
11545
|
+
float weight[IQ1S_BLOCK_SIZE];
|
11546
|
+
int8_t L[IQ1S_BLOCK_SIZE];
|
11547
|
+
float sumx[IQ1S_BLOCK_SIZE+1];
|
11548
|
+
float sumw[IQ1S_BLOCK_SIZE+1];
|
11549
|
+
float pairs[2*IQ1S_BLOCK_SIZE];
|
12193
11550
|
int * idx = (int *)(pairs + 1);
|
12194
|
-
|
11551
|
+
uint16_t index[IQ1S_BLOCK_SIZE/8];
|
11552
|
+
int8_t shifts[QK_K/IQ1S_BLOCK_SIZE];
|
12195
11553
|
|
12196
11554
|
for (int ibl = 0; ibl < nbl; ++ibl) {
|
12197
11555
|
|
12198
11556
|
y[ibl].d = GGML_FP32_TO_FP16(0.f);
|
12199
11557
|
memset(y[ibl].qs, 0, QK_K/8);
|
12200
|
-
memset(y[ibl].
|
11558
|
+
memset(y[ibl].qh, 0, QK_K/16);
|
12201
11559
|
|
12202
11560
|
float max_scale = 0;
|
12203
11561
|
|
12204
11562
|
const float * xbl = x + QK_K*ibl;
|
12205
11563
|
float sumx2 = 0;
|
12206
11564
|
for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
|
12207
|
-
float sigma2 = sumx2/QK_K;
|
11565
|
+
float sigma2 = 2*sumx2/QK_K;
|
12208
11566
|
|
12209
|
-
for (int ib = 0; ib < QK_K/
|
12210
|
-
const float * xb = xbl +
|
12211
|
-
const float * qw = quant_weights + QK_K*ibl +
|
12212
|
-
for (int i = 0; i <
|
11567
|
+
for (int ib = 0; ib < QK_K/IQ1S_BLOCK_SIZE; ++ib) {
|
11568
|
+
const float * xb = xbl + IQ1S_BLOCK_SIZE*ib;
|
11569
|
+
const float * qw = quant_weights + QK_K*ibl + IQ1S_BLOCK_SIZE*ib;
|
11570
|
+
for (int i = 0; i < IQ1S_BLOCK_SIZE; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
|
12213
11571
|
float max = fabsf(xb[0]);
|
12214
|
-
for (int i = 1; i <
|
11572
|
+
for (int i = 1; i < IQ1S_BLOCK_SIZE; ++i) max = MAX(max, fabsf(xb[i]));
|
12215
11573
|
if (!max) {
|
12216
11574
|
scales[ib] = 0;
|
12217
|
-
memset(L, 1,
|
11575
|
+
memset(L, 1, IQ1S_BLOCK_SIZE);
|
12218
11576
|
continue;
|
12219
11577
|
}
|
12220
11578
|
// Here we solve exactly the sum of squared difference (SSD) weighted minimization problem.
|
@@ -12223,52 +11581,81 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
|
12223
11581
|
// in ascending order, compute Si = sum[weight[j] xb[j], j = 0...i] and
|
12224
11582
|
// Wi = sum[weight[j], j = 0...i], and use these to quckly get get the optimum scale
|
12225
11583
|
// for each possible and score for each split.
|
12226
|
-
for (int j = 0; j <
|
11584
|
+
for (int j = 0; j < IQ1S_BLOCK_SIZE; ++j) {
|
12227
11585
|
pairs[2*j] = xb[j];
|
12228
11586
|
idx[2*j] = j;
|
12229
11587
|
}
|
12230
|
-
qsort(pairs,
|
11588
|
+
qsort(pairs, IQ1S_BLOCK_SIZE, 2*sizeof(float), iq1_sort_helper);
|
12231
11589
|
{
|
12232
11590
|
sumx[0] = sumw[0] = 0;
|
12233
|
-
for (int j = 0; j <
|
11591
|
+
for (int j = 0; j < IQ1S_BLOCK_SIZE; ++j) {
|
12234
11592
|
int i = idx[2*j];
|
12235
11593
|
sumx[j+1] = sumx[j] + weight[i]*xb[i];
|
12236
11594
|
sumw[j+1] = sumw[j] + weight[i];
|
12237
11595
|
}
|
12238
11596
|
}
|
12239
11597
|
float best_score = 0, scale = max;
|
12240
|
-
int besti1 =
|
12241
|
-
for (int i1 = 0; i1 <=
|
12242
|
-
for (int i2 = i1; i2 <=
|
12243
|
-
float sumqx =
|
12244
|
-
float sumq2 =
|
11598
|
+
int besti1 = -1, besti2 = -1, best_shift = 0;
|
11599
|
+
for (int i1 = 0; i1 <= IQ1S_BLOCK_SIZE; ++i1) {
|
11600
|
+
for (int i2 = i1; i2 <= IQ1S_BLOCK_SIZE; ++i2) {
|
11601
|
+
float sumqx = (sumx[i1] - sumx[0])*x_p[0] + (sumx[i2] - sumx[i1])*x_p[1] + (sumx[IQ1S_BLOCK_SIZE] - sumx[i2])*x_p[2];
|
11602
|
+
float sumq2 = (sumw[i1] - sumw[0])*x_p[0]*x_p[0] + (sumw[i2] - sumw[i1])*x_p[1]*x_p[1] + (sumw[IQ1S_BLOCK_SIZE] - sumw[i2])*x_p[2]*x_p[2];
|
12245
11603
|
if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
|
12246
11604
|
scale = sumqx/sumq2; best_score = scale*sumqx;
|
12247
|
-
besti1 = i1; besti2 = i2;
|
11605
|
+
besti1 = i1; besti2 = i2; best_shift = 1;
|
11606
|
+
}
|
11607
|
+
sumqx = (sumx[i1] - sumx[0])*x_m[0] + (sumx[i2] - sumx[i1])*x_m[1] + (sumx[IQ1S_BLOCK_SIZE] - sumx[i2])*x_m[2];
|
11608
|
+
sumq2 = (sumw[i1] - sumw[0])*x_m[0]*x_m[0] + (sumw[i2] - sumw[i1])*x_m[1]*x_m[1] + (sumw[IQ1S_BLOCK_SIZE] - sumw[i2])*x_m[2]*x_m[2];
|
11609
|
+
if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
|
11610
|
+
scale = sumqx/sumq2; best_score = scale*sumqx;
|
11611
|
+
besti1 = i1; besti2 = i2; best_shift = -1;
|
12248
11612
|
}
|
12249
11613
|
}
|
12250
11614
|
}
|
11615
|
+
GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_shift != 0);
|
12251
11616
|
for (int j = 0; j < besti1; ++j) L[idx[2*j]] = 0;
|
12252
11617
|
for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
|
12253
|
-
for (int j = besti2; j <
|
11618
|
+
for (int j = besti2; j < IQ1S_BLOCK_SIZE; ++j) L[idx[2*j]] = 2;
|
12254
11619
|
if (scale < 0) {
|
12255
|
-
for (int j = 0; j <
|
12256
|
-
scale = -scale;
|
11620
|
+
for (int j = 0; j < IQ1S_BLOCK_SIZE; ++j) L[j] = 2 - L[j];
|
11621
|
+
scale = -scale; best_shift = -best_shift;
|
11622
|
+
}
|
11623
|
+
bool all_on_grid = true;
|
11624
|
+
const float * xx = best_shift == 1 ? x_p : x_m;
|
11625
|
+
for (int k = 0; k < IQ1S_BLOCK_SIZE/8; ++k) {
|
11626
|
+
uint16_t u = 0;
|
11627
|
+
for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j);
|
11628
|
+
int grid_index = kmap_q2xs[u];
|
11629
|
+
if (grid_index < 0) {
|
11630
|
+
all_on_grid = false;
|
11631
|
+
const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
|
11632
|
+
grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
|
11633
|
+
GGML_ASSERT(grid_index >= 0);
|
11634
|
+
}
|
11635
|
+
index[k] = grid_index;
|
11636
|
+
}
|
11637
|
+
if (!all_on_grid) {
|
11638
|
+
float sumqx = 0, sumq2 = 0;
|
11639
|
+
for (int k = 0; k < IQ1S_BLOCK_SIZE/8; ++k) {
|
11640
|
+
const int8_t * pg = (const int8_t *)(kgrid_q2xs + index[k]);
|
11641
|
+
for (int j = 0; j < 8; ++j) {
|
11642
|
+
float w = weight[8*k + j];
|
11643
|
+
float q = xx[(pg[j] - 1)/2];
|
11644
|
+
sumqx += w*q*xb[8*k+j];
|
11645
|
+
sumq2 += w*q*q;
|
11646
|
+
}
|
11647
|
+
}
|
11648
|
+
if (sumqx > 0 && sumq2 > 0) scale = sumqx/sumq2;
|
11649
|
+
}
|
11650
|
+
uint16_t h = 0;
|
11651
|
+
for (int k = 0; k < IQ1S_BLOCK_SIZE/8; ++k) {
|
11652
|
+
y[ibl].qs[(IQ1S_BLOCK_SIZE/8)*ib + k] = index[k] & 255;
|
11653
|
+
h |= (index[k] >> 8) << 3*k;
|
12257
11654
|
}
|
12258
|
-
|
12259
|
-
// grid point that minimizes SSD.
|
12260
|
-
uint16_t u = 0;
|
12261
|
-
for (int j = 0; j < 8; ++j) u |= (L[j] << 2*j);
|
12262
|
-
int grid_index = kmap_q2xs[u];
|
12263
|
-
if (grid_index < 0) {
|
12264
|
-
const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
|
12265
|
-
grid_index = iq1_find_best_neighbour(neighbours, kgrid_q2xs, xb, weight, &scale, L, NGRID_IQ2XXS);
|
12266
|
-
GGML_ASSERT(grid_index >= 0);
|
12267
|
-
}
|
12268
|
-
y[ibl].qs[ib] = grid_index & 255;
|
12269
|
-
hbit[ib] = grid_index >> 8;
|
11655
|
+
y[ibl].qh[ib] = h;
|
12270
11656
|
GGML_ASSERT(scale >= 0);
|
12271
11657
|
scales[ib] = scale;
|
11658
|
+
shifts[ib] = best_shift;
|
12272
11659
|
max_scale = MAX(max_scale, scale);
|
12273
11660
|
}
|
12274
11661
|
|
@@ -12278,19 +11665,18 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
|
12278
11665
|
}
|
12279
11666
|
|
12280
11667
|
float d = max_scale/15;
|
12281
|
-
y[ibl].d = GGML_FP32_TO_FP16(d*1.
|
11668
|
+
y[ibl].d = GGML_FP32_TO_FP16(d*1.125f); // 1.085f is another fudge factor. Don't ask me why it is needed.
|
12282
11669
|
float id = 1/d;
|
12283
|
-
for (int ib = 0; ib < QK_K/
|
11670
|
+
for (int ib = 0; ib < QK_K/IQ1S_BLOCK_SIZE; ++ib) {
|
12284
11671
|
int l = nearest_int(0.5f*(id*scales[ib]-1));
|
12285
11672
|
l = MAX(0, MIN(7, l));
|
12286
|
-
if (
|
12287
|
-
y[ibl].
|
11673
|
+
if (shifts[ib] == -1) l |= 8;
|
11674
|
+
y[ibl].qh[ib] |= (l << 12);
|
12288
11675
|
}
|
12289
11676
|
}
|
12290
11677
|
}
|
12291
11678
|
|
12292
|
-
size_t quantize_iq1_s(const float * src, void * dst, int nrow, int n_per_row,
|
12293
|
-
(void)hist;
|
11679
|
+
size_t quantize_iq1_s(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
12294
11680
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
12295
11681
|
int nblock = n_per_row/QK_K;
|
12296
11682
|
char * qrow = (char *)dst;
|
@@ -12315,7 +11701,7 @@ static inline int best_index_int8(int n, const int8_t * val, float x) {
|
|
12315
11701
|
return x - val[mu-1] < val[mu] - x ? mu-1 : mu;
|
12316
11702
|
}
|
12317
11703
|
|
12318
|
-
static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float *
|
11704
|
+
static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float * restrict x,
|
12319
11705
|
ggml_fp16_t * dh, uint8_t * q4, uint16_t * scales_h, uint8_t * scales_l,
|
12320
11706
|
float * scales, float * weight, uint8_t * L,
|
12321
11707
|
const int8_t * values,
|
@@ -12423,8 +11809,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
|
|
12423
11809
|
}
|
12424
11810
|
}
|
12425
11811
|
|
12426
|
-
size_t quantize_iq4_nl(const float * src, void * dst, int nrow, int n_per_row,
|
12427
|
-
(void)hist;
|
11812
|
+
size_t quantize_iq4_nl(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
12428
11813
|
GGML_ASSERT(n_per_row%QK4_NL == 0);
|
12429
11814
|
int nblock = n_per_row/QK4_NL;
|
12430
11815
|
char * qrow = (char *)dst;
|
@@ -12454,14 +11839,13 @@ void quantize_row_iq4_nl(const float * restrict x, void * restrict vy, int k) {
|
|
12454
11839
|
|
12455
11840
|
void quantize_row_iq4_nl_reference(const float * restrict x, block_iq4_nl * restrict y, int k) {
|
12456
11841
|
assert(k % QK4_NL == 0);
|
12457
|
-
quantize_iq4_nl(x, y, 1, k, NULL
|
11842
|
+
quantize_iq4_nl(x, y, 1, k, NULL);
|
12458
11843
|
}
|
12459
11844
|
|
12460
|
-
size_t quantize_iq4_xs(const float * src, void * dst, int nrow, int n_per_row,
|
11845
|
+
size_t quantize_iq4_xs(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
12461
11846
|
#if QK_K == 64
|
12462
|
-
return quantize_iq4_nl(src, dst, nrow, n_per_row,
|
11847
|
+
return quantize_iq4_nl(src, dst, nrow, n_per_row, quant_weights);
|
12463
11848
|
#else
|
12464
|
-
(void)hist;
|
12465
11849
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
12466
11850
|
int nblock = n_per_row/QK_K;
|
12467
11851
|
char * qrow = (char *)dst;
|
@@ -12490,7 +11874,7 @@ void quantize_row_iq4_xs(const float * restrict x, void * restrict vy, int k) {
|
|
12490
11874
|
|
12491
11875
|
void quantize_row_iq4_xs_reference(const float * restrict x, block_iq4_xs * restrict y, int k) {
|
12492
11876
|
assert(k % QK_K == 0);
|
12493
|
-
quantize_iq4_xs(x, y, 1, k, NULL
|
11877
|
+
quantize_iq4_xs(x, y, 1, k, NULL);
|
12494
11878
|
}
|
12495
11879
|
|
12496
11880
|
// =============================== 2.5625 bpw
|
@@ -12663,8 +12047,7 @@ static void quantize_row_iq2_s_impl(const float * restrict x, void * restrict vy
|
|
12663
12047
|
}
|
12664
12048
|
}
|
12665
12049
|
|
12666
|
-
size_t quantize_iq2_s(const float * src, void * dst, int nrow, int n_per_row,
|
12667
|
-
(void)hist;
|
12050
|
+
size_t quantize_iq2_s(const float * restrict src, void * restrict dst, int nrow, int n_per_row, const float * quant_weights) {
|
12668
12051
|
GGML_ASSERT(n_per_row%QK_K == 0);
|
12669
12052
|
int nblock = n_per_row/QK_K;
|
12670
12053
|
char * qrow = (char *)dst;
|
@@ -12678,7 +12061,7 @@ size_t quantize_iq2_s(const float * src, void * dst, int nrow, int n_per_row, in
|
|
12678
12061
|
|
12679
12062
|
void quantize_row_iq2_s_reference(const float * restrict x, block_iq2_s * restrict y, int k) {
|
12680
12063
|
assert(k % QK_K == 0);
|
12681
|
-
quantize_iq2_s(x, y, 1, k, NULL
|
12064
|
+
quantize_iq2_s(x, y, 1, k, NULL);
|
12682
12065
|
}
|
12683
12066
|
|
12684
12067
|
void quantize_row_iq2_s(const float * restrict x, void * restrict vy, int k) {
|