llama_cpp 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/examples/chat.rb +2 -2
- data/ext/llama_cpp/extconf.rb +1 -1
- data/ext/llama_cpp/llama_cpp.cpp +23 -11
- data/ext/llama_cpp/src/ggml-alloc.c +13 -50
- data/ext/llama_cpp/src/ggml-cuda.cu +23 -11
- data/ext/llama_cpp/src/ggml-metal.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.m +130 -61
- data/ext/llama_cpp/src/ggml-metal.metal +44 -26
- data/ext/llama_cpp/src/ggml.c +637 -328
- data/ext/llama_cpp/src/ggml.h +45 -19
- data/ext/llama_cpp/src/k_quants.c +2 -2
- data/ext/llama_cpp/src/llama.cpp +426 -97
- data/ext/llama_cpp/src/llama.h +51 -5
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -1
- data/sig/llama_cpp.rbs +5 -3
- metadata +2 -2
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -123,6 +123,8 @@ typedef void * thread_ret_t;
|
|
123
123
|
#define GGML_GELU_FP16
|
124
124
|
#define GGML_GELU_QUICK_FP16
|
125
125
|
#define GGML_SILU_FP16
|
126
|
+
// #define GGML_CROSS_ENTROPY_EXP_FP16
|
127
|
+
// #define GGML_FLASH_ATTN_EXP_FP16
|
126
128
|
|
127
129
|
#define GGML_SOFT_MAX_UNROLL 4
|
128
130
|
#define GGML_VEC_DOT_UNROLL 2
|
@@ -157,12 +159,6 @@ typedef void * thread_ret_t;
|
|
157
159
|
//#define GGML_SOFT_MAX_ACCELERATE
|
158
160
|
#endif
|
159
161
|
|
160
|
-
#if UINTPTR_MAX == 0xFFFFFFFF
|
161
|
-
#define GGML_MEM_ALIGN 4
|
162
|
-
#else
|
163
|
-
#define GGML_MEM_ALIGN 16
|
164
|
-
#endif
|
165
|
-
|
166
162
|
//
|
167
163
|
// logging
|
168
164
|
//
|
@@ -192,8 +188,8 @@ typedef void * thread_ret_t;
|
|
192
188
|
//
|
193
189
|
|
194
190
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
195
|
-
#define GGML_ALIGNED_MALLOC(size)
|
196
|
-
#define GGML_ALIGNED_FREE(ptr)
|
191
|
+
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
|
192
|
+
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
|
197
193
|
#else
|
198
194
|
inline static void * ggml_aligned_malloc(size_t size) {
|
199
195
|
void * aligned_memory = NULL;
|
@@ -218,8 +214,8 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
|
218
214
|
}
|
219
215
|
return aligned_memory;
|
220
216
|
}
|
221
|
-
#define GGML_ALIGNED_MALLOC(size)
|
222
|
-
#define GGML_ALIGNED_FREE(ptr)
|
217
|
+
#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size)
|
218
|
+
#define GGML_ALIGNED_FREE(ptr) free(ptr)
|
223
219
|
#endif
|
224
220
|
|
225
221
|
#define UNUSED GGML_UNUSED
|
@@ -305,6 +301,10 @@ typedef double ggml_float;
|
|
305
301
|
#endif
|
306
302
|
#endif
|
307
303
|
|
304
|
+
#ifdef __riscv_v_intrinsic
|
305
|
+
#include <riscv_vector.h>
|
306
|
+
#endif
|
307
|
+
|
308
308
|
#ifdef __F16C__
|
309
309
|
|
310
310
|
#ifdef _MSC_VER
|
@@ -2436,7 +2436,6 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
|
|
2436
2436
|
const int nb = n / qk;
|
2437
2437
|
|
2438
2438
|
assert(n % qk == 0);
|
2439
|
-
assert(nb % 2 == 0);
|
2440
2439
|
|
2441
2440
|
const block_q4_0 * restrict x = vx;
|
2442
2441
|
const block_q8_0 * restrict y = vy;
|
@@ -2445,6 +2444,7 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
|
|
2445
2444
|
float32x4_t sumv0 = vdupq_n_f32(0.0f);
|
2446
2445
|
float32x4_t sumv1 = vdupq_n_f32(0.0f);
|
2447
2446
|
|
2447
|
+
GGML_ASSERT(nb % 2 == 0); // TODO: handle odd nb
|
2448
2448
|
for (int i = 0; i < nb; i += 2) {
|
2449
2449
|
const block_q4_0 * restrict x0 = &x[i + 0];
|
2450
2450
|
const block_q4_0 * restrict x1 = &x[i + 1];
|
@@ -2623,6 +2623,7 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
|
|
2623
2623
|
}
|
2624
2624
|
|
2625
2625
|
// Main loop
|
2626
|
+
GGML_ASSERT(nb % 2 == 0); // TODO: handle odd nb
|
2626
2627
|
for (int i = 2; i < nb; i+=2) {
|
2627
2628
|
_mm_prefetch(&x[i] + sizeof(block_q4_0), _MM_HINT_T0);
|
2628
2629
|
_mm_prefetch(&y[i] + sizeof(block_q8_0), _MM_HINT_T0);
|
@@ -2680,6 +2681,41 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
|
|
2680
2681
|
}
|
2681
2682
|
|
2682
2683
|
*s = hsum_float_4x4(acc_0, acc_1, acc_2, acc_3);
|
2684
|
+
#elif defined(__riscv_v_intrinsic)
|
2685
|
+
float sumf = 0.0;
|
2686
|
+
|
2687
|
+
size_t vl = __riscv_vsetvl_e8m1(qk/2);
|
2688
|
+
|
2689
|
+
for (int i = 0; i < nb; i++) {
|
2690
|
+
vuint8m1_t tx = __riscv_vle8_v_u8m1(x[i].qs, vl);
|
2691
|
+
|
2692
|
+
vint8m1_t y0 = __riscv_vle8_v_i8m1(y[i].qs, vl);
|
2693
|
+
vint8m1_t y1 = __riscv_vle8_v_i8m1(y[i].qs+16, vl);
|
2694
|
+
|
2695
|
+
vuint8m1_t x_a = __riscv_vand_vx_u8m1(tx, 0x0F, vl);
|
2696
|
+
vuint8m1_t x_l = __riscv_vsrl_vx_u8m1(tx, 0x04, vl);
|
2697
|
+
|
2698
|
+
vint8m1_t x_ai = __riscv_vreinterpret_v_u8m1_i8m1(x_a);
|
2699
|
+
vint8m1_t x_li = __riscv_vreinterpret_v_u8m1_i8m1(x_l);
|
2700
|
+
|
2701
|
+
vint8m1_t v0 = __riscv_vsub_vx_i8m1(x_ai, 8, vl);
|
2702
|
+
vint8m1_t v1 = __riscv_vsub_vx_i8m1(x_li, 8, vl);
|
2703
|
+
|
2704
|
+
vint16m2_t vec_mul1 = __riscv_vwmul_vv_i16m2(v0, y0, vl);
|
2705
|
+
vint16m2_t vec_mul2 = __riscv_vwmul_vv_i16m2(v1, y1, vl);
|
2706
|
+
|
2707
|
+
vint32m1_t vec_zero = __riscv_vmv_v_x_i32m1(0, vl);
|
2708
|
+
|
2709
|
+
vint32m1_t vs1 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul1, vec_zero, vl);
|
2710
|
+
vint32m1_t vs2 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul2, vec_zero, vl);
|
2711
|
+
|
2712
|
+
int sumi = __riscv_vmv_x_s_i32m1_i32(vs1);
|
2713
|
+
sumi += __riscv_vmv_x_s_i32m1_i32(vs2);
|
2714
|
+
|
2715
|
+
sumf += sumi*GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d);
|
2716
|
+
}
|
2717
|
+
|
2718
|
+
*s = sumf;
|
2683
2719
|
#else
|
2684
2720
|
// scalar
|
2685
2721
|
float sumf = 0.0;
|
@@ -2706,7 +2742,6 @@ static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void *
|
|
2706
2742
|
const int nb = n / qk;
|
2707
2743
|
|
2708
2744
|
assert(n % qk == 0);
|
2709
|
-
assert(nb % 2 == 0);
|
2710
2745
|
|
2711
2746
|
const block_q4_1 * restrict x = vx;
|
2712
2747
|
const block_q8_1 * restrict y = vy;
|
@@ -2718,6 +2753,7 @@ static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void *
|
|
2718
2753
|
|
2719
2754
|
float summs = 0;
|
2720
2755
|
|
2756
|
+
GGML_ASSERT(nb % 2 == 0); // TODO: handle odd nb
|
2721
2757
|
for (int i = 0; i < nb; i += 2) {
|
2722
2758
|
const block_q4_1 * restrict x0 = &x[i + 0];
|
2723
2759
|
const block_q4_1 * restrict x1 = &x[i + 1];
|
@@ -2806,6 +2842,38 @@ static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void *
|
|
2806
2842
|
}
|
2807
2843
|
|
2808
2844
|
*s = hsum_float_8(acc) + summs;
|
2845
|
+
#elif defined(__riscv_v_intrinsic)
|
2846
|
+
float sumf = 0.0;
|
2847
|
+
|
2848
|
+
size_t vl = __riscv_vsetvl_e8m1(qk/2);
|
2849
|
+
|
2850
|
+
for (int i = 0; i < nb; i++) {
|
2851
|
+
vuint8m1_t tx = __riscv_vle8_v_u8m1(x[i].qs, vl);
|
2852
|
+
|
2853
|
+
vint8m1_t y0 = __riscv_vle8_v_i8m1(y[i].qs, vl);
|
2854
|
+
vint8m1_t y1 = __riscv_vle8_v_i8m1(y[i].qs+16, vl);
|
2855
|
+
|
2856
|
+
vuint8m1_t x_a = __riscv_vand_vx_u8m1(tx, 0x0F, vl);
|
2857
|
+
vuint8m1_t x_l = __riscv_vsrl_vx_u8m1(tx, 0x04, vl);
|
2858
|
+
|
2859
|
+
vint8m1_t v0 = __riscv_vreinterpret_v_u8m1_i8m1(x_a);
|
2860
|
+
vint8m1_t v1 = __riscv_vreinterpret_v_u8m1_i8m1(x_l);
|
2861
|
+
|
2862
|
+
vint16m2_t vec_mul1 = __riscv_vwmul_vv_i16m2(v0, y0, vl);
|
2863
|
+
vint16m2_t vec_mul2 = __riscv_vwmul_vv_i16m2(v1, y1, vl);
|
2864
|
+
|
2865
|
+
vint32m1_t vec_zero = __riscv_vmv_v_x_i32m1(0, vl);
|
2866
|
+
|
2867
|
+
vint32m1_t vs1 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul1, vec_zero, vl);
|
2868
|
+
vint32m1_t vs2 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul2, vec_zero, vl);
|
2869
|
+
|
2870
|
+
int sumi = __riscv_vmv_x_s_i32m1_i32(vs1);
|
2871
|
+
sumi += __riscv_vmv_x_s_i32m1_i32(vs2);
|
2872
|
+
|
2873
|
+
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
|
2874
|
+
}
|
2875
|
+
|
2876
|
+
*s = sumf;
|
2809
2877
|
#else
|
2810
2878
|
// scalar
|
2811
2879
|
float sumf = 0.0;
|
@@ -2832,7 +2900,6 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
|
|
2832
2900
|
const int nb = n / qk;
|
2833
2901
|
|
2834
2902
|
assert(n % qk == 0);
|
2835
|
-
assert(nb % 2 == 0);
|
2836
2903
|
assert(qk == QK5_0);
|
2837
2904
|
|
2838
2905
|
const block_q5_0 * restrict x = vx;
|
@@ -2848,6 +2915,7 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
|
|
2848
2915
|
uint64_t tmp0[4];
|
2849
2916
|
uint64_t tmp1[4];
|
2850
2917
|
|
2918
|
+
GGML_ASSERT(nb % 2 == 0); // TODO: handle odd nb
|
2851
2919
|
for (int i = 0; i < nb; i += 2) {
|
2852
2920
|
const block_q5_0 * restrict x0 = &x[i];
|
2853
2921
|
const block_q5_0 * restrict x1 = &x[i + 1];
|
@@ -3040,6 +3108,76 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
|
|
3040
3108
|
}
|
3041
3109
|
|
3042
3110
|
*s = hsum_float_8(acc);
|
3111
|
+
#elif defined(__riscv_v_intrinsic)
|
3112
|
+
float sumf = 0.0;
|
3113
|
+
|
3114
|
+
uint32_t qh;
|
3115
|
+
|
3116
|
+
// These temp values are for masking and shift operations
|
3117
|
+
uint32_t temp_1[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
3118
|
+
uint32_t temp_2[16] = {0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
|
3119
|
+
0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000};
|
3120
|
+
|
3121
|
+
size_t vl = __riscv_vsetvl_e8m1(qk/2);
|
3122
|
+
|
3123
|
+
for (int i = 0; i < nb; i++) {
|
3124
|
+
memcpy(&qh, x[i].qh, sizeof(uint32_t));
|
3125
|
+
|
3126
|
+
// temporary registers
|
3127
|
+
vuint32m4_t vt_1 = __riscv_vle32_v_u32m4(temp_2, vl);
|
3128
|
+
vuint32m4_t vt_2 = __riscv_vle32_v_u32m4(temp_1, vl);
|
3129
|
+
vuint32m4_t vt_3 = __riscv_vsll_vx_u32m4(vt_1, 16, vl);
|
3130
|
+
vuint32m4_t vt_4 = __riscv_vadd_vx_u32m4(vt_2, 12, vl);
|
3131
|
+
|
3132
|
+
// ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
|
3133
|
+
vuint32m4_t xha_0 = __riscv_vand_vx_u32m4(vt_1, qh, vl);
|
3134
|
+
vuint32m4_t xhr_0 = __riscv_vsrl_vv_u32m4(xha_0, vt_2, vl);
|
3135
|
+
vuint32m4_t xhl_0 = __riscv_vsll_vx_u32m4(xhr_0, 4, vl);
|
3136
|
+
|
3137
|
+
// ((qh & (1u << (j + 16))) >> (j + 12));
|
3138
|
+
vuint32m4_t xha_1 = __riscv_vand_vx_u32m4(vt_3, qh, vl);
|
3139
|
+
vuint32m4_t xhl_1 = __riscv_vsrl_vv_u32m4(xha_1, vt_4, vl);
|
3140
|
+
|
3141
|
+
// narrowing
|
3142
|
+
vuint16m2_t xhc_0 = __riscv_vncvt_x_x_w_u16m2(xhl_0, vl);
|
3143
|
+
vuint8m1_t xh_0 = __riscv_vncvt_x_x_w_u8m1(xhc_0, vl);
|
3144
|
+
|
3145
|
+
vuint16m2_t xhc_1 = __riscv_vncvt_x_x_w_u16m2(xhl_1, vl);
|
3146
|
+
vuint8m1_t xh_1 = __riscv_vncvt_x_x_w_u8m1(xhc_1, vl);
|
3147
|
+
|
3148
|
+
// load
|
3149
|
+
vuint8m1_t tx = __riscv_vle8_v_u8m1(x[i].qs, vl);
|
3150
|
+
|
3151
|
+
vint8m1_t y0 = __riscv_vle8_v_i8m1(y[i].qs, vl);
|
3152
|
+
vint8m1_t y1 = __riscv_vle8_v_i8m1(y[i].qs+16, vl);
|
3153
|
+
|
3154
|
+
vuint8m1_t x_at = __riscv_vand_vx_u8m1(tx, 0x0F, vl);
|
3155
|
+
vuint8m1_t x_lt = __riscv_vsrl_vx_u8m1(tx, 0x04, vl);
|
3156
|
+
|
3157
|
+
vuint8m1_t x_a = __riscv_vor_vv_u8m1(x_at, xh_0, vl);
|
3158
|
+
vuint8m1_t x_l = __riscv_vor_vv_u8m1(x_lt, xh_1, vl);
|
3159
|
+
|
3160
|
+
vint8m1_t x_ai = __riscv_vreinterpret_v_u8m1_i8m1(x_a);
|
3161
|
+
vint8m1_t x_li = __riscv_vreinterpret_v_u8m1_i8m1(x_l);
|
3162
|
+
|
3163
|
+
vint8m1_t v0 = __riscv_vsub_vx_i8m1(x_ai, 16, vl);
|
3164
|
+
vint8m1_t v1 = __riscv_vsub_vx_i8m1(x_li, 16, vl);
|
3165
|
+
|
3166
|
+
vint16m2_t vec_mul1 = __riscv_vwmul_vv_i16m2(v0, y0, vl);
|
3167
|
+
vint16m2_t vec_mul2 = __riscv_vwmul_vv_i16m2(v1, y1, vl);
|
3168
|
+
|
3169
|
+
vint32m1_t vec_zero = __riscv_vmv_v_x_i32m1(0, vl);
|
3170
|
+
|
3171
|
+
vint32m1_t vs1 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul1, vec_zero, vl);
|
3172
|
+
vint32m1_t vs2 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul2, vec_zero, vl);
|
3173
|
+
|
3174
|
+
int sumi = __riscv_vmv_x_s_i32m1_i32(vs1);
|
3175
|
+
sumi += __riscv_vmv_x_s_i32m1_i32(vs2);
|
3176
|
+
|
3177
|
+
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d)) * sumi;
|
3178
|
+
}
|
3179
|
+
|
3180
|
+
*s = sumf;
|
3043
3181
|
#else
|
3044
3182
|
// scalar
|
3045
3183
|
float sumf = 0.0;
|
@@ -3072,7 +3210,6 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
|
|
3072
3210
|
const int nb = n / qk;
|
3073
3211
|
|
3074
3212
|
assert(n % qk == 0);
|
3075
|
-
assert(nb % 2 == 0);
|
3076
3213
|
assert(qk == QK5_1);
|
3077
3214
|
|
3078
3215
|
const block_q5_1 * restrict x = vx;
|
@@ -3091,6 +3228,7 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
|
|
3091
3228
|
uint64_t tmp0[4];
|
3092
3229
|
uint64_t tmp1[4];
|
3093
3230
|
|
3231
|
+
GGML_ASSERT(nb % 2 == 0); // TODO: handle odd nb
|
3094
3232
|
for (int i = 0; i < nb; i += 2) {
|
3095
3233
|
const block_q5_1 * restrict x0 = &x[i];
|
3096
3234
|
const block_q5_1 * restrict x1 = &x[i + 1];
|
@@ -3296,6 +3434,72 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
|
|
3296
3434
|
}
|
3297
3435
|
|
3298
3436
|
*s = hsum_float_8(acc) + summs;
|
3437
|
+
#elif defined(__riscv_v_intrinsic)
|
3438
|
+
float sumf = 0.0;
|
3439
|
+
|
3440
|
+
uint32_t qh;
|
3441
|
+
|
3442
|
+
// These temp values are for shift operations
|
3443
|
+
uint32_t temp_1[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
3444
|
+
|
3445
|
+
size_t vl = __riscv_vsetvl_e8m1(qk/2);
|
3446
|
+
|
3447
|
+
for (int i = 0; i < nb; i++) {
|
3448
|
+
memcpy(&qh, x[i].qh, sizeof(uint32_t));
|
3449
|
+
|
3450
|
+
// temporary registers
|
3451
|
+
vuint32m4_t vt_1 = __riscv_vle32_v_u32m4(temp_1, vl);
|
3452
|
+
vuint32m4_t vt_2 = __riscv_vadd_vx_u32m4(vt_1, 12, vl);
|
3453
|
+
|
3454
|
+
// load qh
|
3455
|
+
vuint32m4_t vqh = __riscv_vmv_v_x_u32m4(qh, vl);
|
3456
|
+
|
3457
|
+
// ((qh >> (j + 0)) << 4) & 0x10;
|
3458
|
+
vuint32m4_t xhr_0 = __riscv_vsrl_vv_u32m4(vqh, vt_1, vl);
|
3459
|
+
vuint32m4_t xhl_0 = __riscv_vsll_vx_u32m4(xhr_0, 4, vl);
|
3460
|
+
vuint32m4_t xha_0 = __riscv_vand_vx_u32m4(xhl_0, 0x10, vl);
|
3461
|
+
|
3462
|
+
// ((qh >> (j + 12)) ) & 0x10;
|
3463
|
+
vuint32m4_t xhr_1 = __riscv_vsrl_vv_u32m4(vqh, vt_2, vl);
|
3464
|
+
vuint32m4_t xha_1 = __riscv_vand_vx_u32m4(xhr_1, 0x10, vl);
|
3465
|
+
|
3466
|
+
// narrowing
|
3467
|
+
vuint16m2_t xhc_0 = __riscv_vncvt_x_x_w_u16m2(xha_0, vl);
|
3468
|
+
vuint8m1_t xh_0 = __riscv_vncvt_x_x_w_u8m1(xhc_0, vl);
|
3469
|
+
|
3470
|
+
vuint16m2_t xhc_1 = __riscv_vncvt_x_x_w_u16m2(xha_1, vl);
|
3471
|
+
vuint8m1_t xh_1 = __riscv_vncvt_x_x_w_u8m1(xhc_1, vl);
|
3472
|
+
|
3473
|
+
// load
|
3474
|
+
vuint8m1_t tx = __riscv_vle8_v_u8m1(x[i].qs, vl);
|
3475
|
+
|
3476
|
+
vint8m1_t y0 = __riscv_vle8_v_i8m1(y[i].qs, vl);
|
3477
|
+
vint8m1_t y1 = __riscv_vle8_v_i8m1(y[i].qs+16, vl);
|
3478
|
+
|
3479
|
+
vuint8m1_t x_at = __riscv_vand_vx_u8m1(tx, 0x0F, vl);
|
3480
|
+
vuint8m1_t x_lt = __riscv_vsrl_vx_u8m1(tx, 0x04, vl);
|
3481
|
+
|
3482
|
+
vuint8m1_t x_a = __riscv_vor_vv_u8m1(x_at, xh_0, vl);
|
3483
|
+
vuint8m1_t x_l = __riscv_vor_vv_u8m1(x_lt, xh_1, vl);
|
3484
|
+
|
3485
|
+
vint8m1_t v0 = __riscv_vreinterpret_v_u8m1_i8m1(x_a);
|
3486
|
+
vint8m1_t v1 = __riscv_vreinterpret_v_u8m1_i8m1(x_l);
|
3487
|
+
|
3488
|
+
vint16m2_t vec_mul1 = __riscv_vwmul_vv_i16m2(v0, y0, vl);
|
3489
|
+
vint16m2_t vec_mul2 = __riscv_vwmul_vv_i16m2(v1, y1, vl);
|
3490
|
+
|
3491
|
+
vint32m1_t vec_zero = __riscv_vmv_v_x_i32m1(0, vl);
|
3492
|
+
|
3493
|
+
vint32m1_t vs1 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul1, vec_zero, vl);
|
3494
|
+
vint32m1_t vs2 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul2, vec_zero, vl);
|
3495
|
+
|
3496
|
+
int sumi = __riscv_vmv_x_s_i32m1_i32(vs1);
|
3497
|
+
sumi += __riscv_vmv_x_s_i32m1_i32(vs2);
|
3498
|
+
|
3499
|
+
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
|
3500
|
+
}
|
3501
|
+
|
3502
|
+
*s = sumf;
|
3299
3503
|
#else
|
3300
3504
|
// scalar
|
3301
3505
|
float sumf = 0.0;
|
@@ -3328,7 +3532,6 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *
|
|
3328
3532
|
const int nb = n / qk;
|
3329
3533
|
|
3330
3534
|
assert(n % qk == 0);
|
3331
|
-
assert(nb % 2 == 0);
|
3332
3535
|
|
3333
3536
|
const block_q8_0 * restrict x = vx;
|
3334
3537
|
const block_q8_0 * restrict y = vy;
|
@@ -3337,6 +3540,7 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *
|
|
3337
3540
|
float32x4_t sumv0 = vdupq_n_f32(0.0f);
|
3338
3541
|
float32x4_t sumv1 = vdupq_n_f32(0.0f);
|
3339
3542
|
|
3543
|
+
GGML_ASSERT(nb % 2 == 0); // TODO: handle odd nb
|
3340
3544
|
for (int i = 0; i < nb; i += 2) {
|
3341
3545
|
const block_q8_0 * restrict x0 = &x[i + 0];
|
3342
3546
|
const block_q8_0 * restrict x1 = &x[i + 1];
|
@@ -3407,6 +3611,26 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *
|
|
3407
3611
|
}
|
3408
3612
|
|
3409
3613
|
*s = hsum_float_8(acc);
|
3614
|
+
#elif defined(__riscv_v_intrinsic)
|
3615
|
+
float sumf = 0.0;
|
3616
|
+
size_t vl = __riscv_vsetvl_e8m1(qk);
|
3617
|
+
|
3618
|
+
for (int i = 0; i < nb; i++) {
|
3619
|
+
// load elements
|
3620
|
+
vint8m1_t bx = __riscv_vle8_v_i8m1(x[i].qs, vl);
|
3621
|
+
vint8m1_t by = __riscv_vle8_v_i8m1(y[i].qs, vl);
|
3622
|
+
|
3623
|
+
vint16m2_t vw_mul = __riscv_vwmul_vv_i16m2(bx, by, vl);
|
3624
|
+
|
3625
|
+
vint32m1_t v_zero = __riscv_vmv_v_x_i32m1(0, vl);
|
3626
|
+
vint32m1_t v_sum = __riscv_vwredsum_vs_i16m2_i32m1(vw_mul, v_zero, vl);
|
3627
|
+
|
3628
|
+
int sumi = __riscv_vmv_x_s_i32m1_i32(v_sum);
|
3629
|
+
|
3630
|
+
sumf += sumi*(GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d));
|
3631
|
+
}
|
3632
|
+
|
3633
|
+
*s = sumf;
|
3410
3634
|
#else
|
3411
3635
|
// scalar
|
3412
3636
|
float sumf = 0.0;
|
@@ -4107,16 +4331,11 @@ int64_t ggml_nrows(const struct ggml_tensor * tensor) {
|
|
4107
4331
|
}
|
4108
4332
|
|
4109
4333
|
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
|
4110
|
-
|
4111
|
-
|
4112
|
-
|
4113
|
-
|
4114
|
-
|
4115
|
-
// return tensor->ne[3]*tensor->nb[3]
|
4116
|
-
//
|
4117
|
-
// is enough, but just in case, adding the second part
|
4118
|
-
|
4119
|
-
return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*ggml_type_size(tensor->type))/ggml_blck_size(tensor->type));
|
4334
|
+
size_t nbytes = tensor->ne[0]*tensor->nb[0]/ggml_blck_size(tensor->type);
|
4335
|
+
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
|
4336
|
+
nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
|
4337
|
+
}
|
4338
|
+
return nbytes;
|
4120
4339
|
}
|
4121
4340
|
|
4122
4341
|
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
|
@@ -4570,36 +4789,51 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
4570
4789
|
enum ggml_type type,
|
4571
4790
|
int n_dims,
|
4572
4791
|
const int64_t * ne,
|
4573
|
-
|
4792
|
+
struct ggml_tensor * view_src,
|
4793
|
+
size_t view_offs) {
|
4574
4794
|
|
4575
4795
|
assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
|
4576
4796
|
|
4577
|
-
|
4797
|
+
// find the base tensor and absolute offset
|
4798
|
+
if (view_src != NULL && view_src->view_src != NULL) {
|
4799
|
+
view_offs += view_src->view_offs;
|
4800
|
+
view_src = view_src->view_src;
|
4801
|
+
}
|
4578
4802
|
|
4579
|
-
|
4580
|
-
|
4581
|
-
|
4582
|
-
data_size *= ne[i];
|
4583
|
-
}
|
4803
|
+
size_t data_size = ggml_type_size(type)*(ne[0]/ggml_blck_size(type));
|
4804
|
+
for (int i = 1; i < n_dims; i++) {
|
4805
|
+
data_size *= ne[i];
|
4584
4806
|
}
|
4585
4807
|
|
4586
|
-
|
4587
|
-
// allocate tensor data in the scratch buffer
|
4588
|
-
if (ctx->scratch.offs + data_size > ctx->scratch.size) {
|
4589
|
-
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
4590
|
-
__func__, ctx->scratch.offs + data_size, ctx->scratch.size);
|
4591
|
-
assert(false);
|
4592
|
-
return NULL;
|
4593
|
-
}
|
4808
|
+
GGML_ASSERT(view_src == NULL || data_size + view_offs <= ggml_nbytes(view_src));
|
4594
4809
|
|
4595
|
-
|
4810
|
+
void * data = view_src != NULL ? view_src->data : NULL;
|
4811
|
+
if (data != NULL) {
|
4812
|
+
data = (char *) data + view_offs;
|
4813
|
+
}
|
4596
4814
|
|
4597
|
-
|
4815
|
+
size_t obj_alloc_size = 0;
|
4598
4816
|
|
4599
|
-
|
4817
|
+
if (view_src == NULL && ctx->no_alloc == false) {
|
4818
|
+
if (ctx->scratch.data != NULL) {
|
4819
|
+
// allocate tensor data in the scratch buffer
|
4820
|
+
if (ctx->scratch.offs + data_size > ctx->scratch.size) {
|
4821
|
+
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
4822
|
+
__func__, ctx->scratch.offs + data_size, ctx->scratch.size);
|
4823
|
+
assert(false);
|
4824
|
+
return NULL;
|
4825
|
+
}
|
4826
|
+
|
4827
|
+
data = (char * const) ctx->scratch.data + ctx->scratch.offs;
|
4828
|
+
|
4829
|
+
ctx->scratch.offs += data_size;
|
4830
|
+
} else {
|
4831
|
+
// allocate tensor data in the context's memory pool
|
4832
|
+
obj_alloc_size = data_size;
|
4833
|
+
}
|
4600
4834
|
}
|
4601
4835
|
|
4602
|
-
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE +
|
4836
|
+
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
|
4603
4837
|
|
4604
4838
|
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
4605
4839
|
|
@@ -4619,7 +4853,9 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
4619
4853
|
/*.perf_runs =*/ 0,
|
4620
4854
|
/*.perf_cycles =*/ 0,
|
4621
4855
|
/*.perf_time_us =*/ 0,
|
4622
|
-
/*.
|
4856
|
+
/*.view_src =*/ view_src,
|
4857
|
+
/*.view_offs =*/ view_offs,
|
4858
|
+
/*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
|
4623
4859
|
/*.name =*/ { 0 },
|
4624
4860
|
/*.extra =*/ NULL,
|
4625
4861
|
/*.padding =*/ { 0 },
|
@@ -4643,28 +4879,12 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
4643
4879
|
return result;
|
4644
4880
|
}
|
4645
4881
|
|
4646
|
-
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
4647
|
-
GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
|
4648
|
-
assert(params_size <= GGML_MAX_OP_PARAMS);
|
4649
|
-
memcpy(tensor->op_params, params, params_size);
|
4650
|
-
}
|
4651
|
-
|
4652
|
-
static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
|
4653
|
-
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4654
|
-
return ((const int32_t *)(tensor->op_params))[i];
|
4655
|
-
}
|
4656
|
-
|
4657
|
-
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
|
4658
|
-
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4659
|
-
((int32_t *)(tensor->op_params))[i] = value;
|
4660
|
-
}
|
4661
|
-
|
4662
4882
|
struct ggml_tensor * ggml_new_tensor(
|
4663
4883
|
struct ggml_context * ctx,
|
4664
4884
|
enum ggml_type type,
|
4665
4885
|
int n_dims,
|
4666
4886
|
const int64_t * ne) {
|
4667
|
-
return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL);
|
4887
|
+
return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
|
4668
4888
|
}
|
4669
4889
|
|
4670
4890
|
struct ggml_tensor * ggml_new_tensor_1d(
|
@@ -4729,7 +4949,23 @@ struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) {
|
|
4729
4949
|
}
|
4730
4950
|
|
4731
4951
|
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
|
4732
|
-
return
|
4952
|
+
return ggml_new_tensor(ctx, src->type, src->n_dims, src->ne);
|
4953
|
+
}
|
4954
|
+
|
4955
|
+
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
4956
|
+
GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
|
4957
|
+
assert(params_size <= GGML_MAX_OP_PARAMS);
|
4958
|
+
memcpy(tensor->op_params, params, params_size);
|
4959
|
+
}
|
4960
|
+
|
4961
|
+
static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
|
4962
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4963
|
+
return ((const int32_t *)(tensor->op_params))[i];
|
4964
|
+
}
|
4965
|
+
|
4966
|
+
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
|
4967
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4968
|
+
((int32_t *)(tensor->op_params))[i] = value;
|
4733
4969
|
}
|
4734
4970
|
|
4735
4971
|
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
|
@@ -5015,14 +5251,13 @@ struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char *
|
|
5015
5251
|
|
5016
5252
|
struct ggml_tensor * ggml_view_tensor(
|
5017
5253
|
struct ggml_context * ctx,
|
5018
|
-
|
5019
|
-
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src
|
5254
|
+
struct ggml_tensor * src) {
|
5255
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src, 0);
|
5020
5256
|
ggml_format_name(result, "%s (view)", src->name);
|
5021
5257
|
|
5022
|
-
|
5023
|
-
|
5024
|
-
|
5025
|
-
result->nb[3] = src->nb[3];
|
5258
|
+
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
5259
|
+
result->nb[i] = src->nb[i];
|
5260
|
+
}
|
5026
5261
|
|
5027
5262
|
return result;
|
5028
5263
|
}
|
@@ -5595,7 +5830,7 @@ struct ggml_tensor * ggml_repeat_back(
|
|
5595
5830
|
|
5596
5831
|
// ggml_concat
|
5597
5832
|
|
5598
|
-
struct ggml_tensor* ggml_concat(
|
5833
|
+
struct ggml_tensor * ggml_concat(
|
5599
5834
|
struct ggml_context* ctx,
|
5600
5835
|
struct ggml_tensor* a,
|
5601
5836
|
struct ggml_tensor* b) {
|
@@ -5862,7 +6097,8 @@ struct ggml_tensor * ggml_rms_norm_inplace(
|
|
5862
6097
|
struct ggml_tensor * ggml_rms_norm_back(
|
5863
6098
|
struct ggml_context * ctx,
|
5864
6099
|
struct ggml_tensor * a,
|
5865
|
-
struct ggml_tensor * b
|
6100
|
+
struct ggml_tensor * b,
|
6101
|
+
float eps) {
|
5866
6102
|
bool is_node = false;
|
5867
6103
|
|
5868
6104
|
if (a->grad) {
|
@@ -5872,6 +6108,8 @@ struct ggml_tensor * ggml_rms_norm_back(
|
|
5872
6108
|
|
5873
6109
|
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
5874
6110
|
|
6111
|
+
ggml_set_op_params(result, &eps, sizeof(eps));
|
6112
|
+
|
5875
6113
|
result->op = GGML_OP_RMS_NORM_BACK;
|
5876
6114
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5877
6115
|
result->src[0] = a;
|
@@ -6201,7 +6439,7 @@ struct ggml_tensor * ggml_reshape(
|
|
6201
6439
|
//GGML_ASSERT(false);
|
6202
6440
|
}
|
6203
6441
|
|
6204
|
-
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a
|
6442
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a, 0);
|
6205
6443
|
ggml_format_name(result, "%s (reshaped)", a->name);
|
6206
6444
|
|
6207
6445
|
result->op = GGML_OP_RESHAPE;
|
@@ -6225,7 +6463,7 @@ struct ggml_tensor * ggml_reshape_1d(
|
|
6225
6463
|
}
|
6226
6464
|
|
6227
6465
|
const int64_t ne[1] = { ne0 };
|
6228
|
-
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a
|
6466
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
|
6229
6467
|
ggml_format_name(result, "%s (reshaped)", a->name);
|
6230
6468
|
|
6231
6469
|
result->op = GGML_OP_RESHAPE;
|
@@ -6250,7 +6488,7 @@ struct ggml_tensor * ggml_reshape_2d(
|
|
6250
6488
|
}
|
6251
6489
|
|
6252
6490
|
const int64_t ne[2] = { ne0, ne1 };
|
6253
|
-
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a
|
6491
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
|
6254
6492
|
ggml_format_name(result, "%s (reshaped)", a->name);
|
6255
6493
|
|
6256
6494
|
result->op = GGML_OP_RESHAPE;
|
@@ -6276,7 +6514,7 @@ struct ggml_tensor * ggml_reshape_3d(
|
|
6276
6514
|
}
|
6277
6515
|
|
6278
6516
|
const int64_t ne[3] = { ne0, ne1, ne2 };
|
6279
|
-
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a
|
6517
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
|
6280
6518
|
ggml_format_name(result, "%s (reshaped)", a->name);
|
6281
6519
|
|
6282
6520
|
result->op = GGML_OP_RESHAPE;
|
@@ -6286,7 +6524,6 @@ struct ggml_tensor * ggml_reshape_3d(
|
|
6286
6524
|
return result;
|
6287
6525
|
}
|
6288
6526
|
|
6289
|
-
|
6290
6527
|
struct ggml_tensor * ggml_reshape_4d(
|
6291
6528
|
struct ggml_context * ctx,
|
6292
6529
|
struct ggml_tensor * a,
|
@@ -6304,7 +6541,7 @@ struct ggml_tensor * ggml_reshape_4d(
|
|
6304
6541
|
}
|
6305
6542
|
|
6306
6543
|
const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
|
6307
|
-
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a
|
6544
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
|
6308
6545
|
ggml_format_name(result, "%s (reshaped)", a->name);
|
6309
6546
|
|
6310
6547
|
result->op = GGML_OP_RESHAPE;
|
@@ -6314,46 +6551,40 @@ struct ggml_tensor * ggml_reshape_4d(
|
|
6314
6551
|
return result;
|
6315
6552
|
}
|
6316
6553
|
|
6317
|
-
|
6318
|
-
|
6319
|
-
static struct ggml_tensor * ggml_view_tensor_offset(
|
6554
|
+
static struct ggml_tensor * ggml_view_impl(
|
6320
6555
|
struct ggml_context * ctx,
|
6321
6556
|
struct ggml_tensor * a,
|
6322
6557
|
int n_dims,
|
6323
6558
|
const int64_t * ne,
|
6324
6559
|
size_t offset) {
|
6325
|
-
// don't calculate an offset from an unallocated tensor
|
6326
|
-
void * data = NULL;
|
6327
|
-
if (a->data != NULL) {
|
6328
|
-
data = (char *) a->data + offset;
|
6329
|
-
}
|
6330
6560
|
|
6331
|
-
|
6561
|
+
bool is_node = false;
|
6562
|
+
|
6563
|
+
if (a->grad) {
|
6564
|
+
is_node = true;
|
6565
|
+
}
|
6332
6566
|
|
6567
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
|
6333
6568
|
ggml_format_name(result, "%s (view)", a->name);
|
6334
6569
|
|
6335
6570
|
ggml_set_op_params(result, &offset, sizeof(offset));
|
6336
6571
|
|
6572
|
+
result->op = GGML_OP_VIEW;
|
6573
|
+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6574
|
+
result->src[0] = a;
|
6575
|
+
|
6337
6576
|
return result;
|
6338
6577
|
}
|
6339
6578
|
|
6579
|
+
// ggml_view_1d
|
6580
|
+
|
6340
6581
|
struct ggml_tensor * ggml_view_1d(
|
6341
6582
|
struct ggml_context * ctx,
|
6342
6583
|
struct ggml_tensor * a,
|
6343
6584
|
int64_t ne0,
|
6344
6585
|
size_t offset) {
|
6345
6586
|
|
6346
|
-
|
6347
|
-
|
6348
|
-
if (a->grad) {
|
6349
|
-
is_node = true;
|
6350
|
-
}
|
6351
|
-
|
6352
|
-
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 1, &ne0, offset);
|
6353
|
-
|
6354
|
-
result->op = GGML_OP_VIEW;
|
6355
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6356
|
-
result->src[0] = a;
|
6587
|
+
struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
|
6357
6588
|
|
6358
6589
|
return result;
|
6359
6590
|
}
|
@@ -6368,24 +6599,14 @@ struct ggml_tensor * ggml_view_2d(
|
|
6368
6599
|
size_t nb1,
|
6369
6600
|
size_t offset) {
|
6370
6601
|
|
6371
|
-
|
6372
|
-
|
6373
|
-
if (a->grad) {
|
6374
|
-
is_node = true;
|
6375
|
-
}
|
6376
|
-
|
6377
|
-
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 };
|
6602
|
+
const int64_t ne[2] = { ne0, ne1 };
|
6378
6603
|
|
6379
|
-
struct ggml_tensor * result =
|
6604
|
+
struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
|
6380
6605
|
|
6381
6606
|
result->nb[1] = nb1;
|
6382
6607
|
result->nb[2] = result->nb[1]*ne1;
|
6383
6608
|
result->nb[3] = result->nb[2];
|
6384
6609
|
|
6385
|
-
result->op = GGML_OP_VIEW;
|
6386
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6387
|
-
result->src[0] = a;
|
6388
|
-
|
6389
6610
|
return result;
|
6390
6611
|
}
|
6391
6612
|
|
@@ -6401,24 +6622,14 @@ struct ggml_tensor * ggml_view_3d(
|
|
6401
6622
|
size_t nb2,
|
6402
6623
|
size_t offset) {
|
6403
6624
|
|
6404
|
-
|
6405
|
-
|
6406
|
-
if (a->grad) {
|
6407
|
-
is_node = true;
|
6408
|
-
}
|
6409
|
-
|
6410
|
-
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 };
|
6625
|
+
const int64_t ne[3] = { ne0, ne1, ne2 };
|
6411
6626
|
|
6412
|
-
struct ggml_tensor * result =
|
6627
|
+
struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
|
6413
6628
|
|
6414
6629
|
result->nb[1] = nb1;
|
6415
6630
|
result->nb[2] = nb2;
|
6416
6631
|
result->nb[3] = result->nb[2]*ne2;
|
6417
6632
|
|
6418
|
-
result->op = GGML_OP_VIEW;
|
6419
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6420
|
-
result->src[0] = a;
|
6421
|
-
|
6422
6633
|
return result;
|
6423
6634
|
}
|
6424
6635
|
|
@@ -6436,24 +6647,14 @@ struct ggml_tensor * ggml_view_4d(
|
|
6436
6647
|
size_t nb3,
|
6437
6648
|
size_t offset) {
|
6438
6649
|
|
6439
|
-
|
6440
|
-
|
6441
|
-
if (a->grad) {
|
6442
|
-
is_node = true;
|
6443
|
-
}
|
6444
|
-
|
6445
|
-
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, ne3 };
|
6650
|
+
const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
|
6446
6651
|
|
6447
|
-
struct ggml_tensor * result =
|
6652
|
+
struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
|
6448
6653
|
|
6449
6654
|
result->nb[1] = nb1;
|
6450
6655
|
result->nb[2] = nb2;
|
6451
6656
|
result->nb[3] = nb3;
|
6452
6657
|
|
6453
|
-
result->op = GGML_OP_VIEW;
|
6454
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6455
|
-
result->src[0] = a;
|
6456
|
-
|
6457
6658
|
return result;
|
6458
6659
|
}
|
6459
6660
|
|
@@ -6640,7 +6841,7 @@ static struct ggml_tensor * ggml_diag_mask_inf_impl(
|
|
6640
6841
|
|
6641
6842
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6642
6843
|
|
6643
|
-
int32_t params[] = { n_past
|
6844
|
+
int32_t params[] = { n_past };
|
6644
6845
|
ggml_set_op_params(result, params, sizeof(params));
|
6645
6846
|
|
6646
6847
|
result->op = GGML_OP_DIAG_MASK_INF;
|
@@ -6657,7 +6858,6 @@ struct ggml_tensor * ggml_diag_mask_inf(
|
|
6657
6858
|
return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
|
6658
6859
|
}
|
6659
6860
|
|
6660
|
-
|
6661
6861
|
struct ggml_tensor * ggml_diag_mask_inf_inplace(
|
6662
6862
|
struct ggml_context * ctx,
|
6663
6863
|
struct ggml_tensor * a,
|
@@ -6680,7 +6880,7 @@ static struct ggml_tensor * ggml_diag_mask_zero_impl(
|
|
6680
6880
|
|
6681
6881
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6682
6882
|
|
6683
|
-
int32_t params[] = { n_past
|
6883
|
+
int32_t params[] = { n_past };
|
6684
6884
|
ggml_set_op_params(result, params, sizeof(params));
|
6685
6885
|
|
6686
6886
|
result->op = GGML_OP_DIAG_MASK_ZERO;
|
@@ -7097,11 +7297,13 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
|
7097
7297
|
};
|
7098
7298
|
|
7099
7299
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7300
|
+
|
7301
|
+
ggml_set_op_params_i32(result, 0, stride);
|
7302
|
+
|
7100
7303
|
result->op = GGML_OP_CONV_TRANSPOSE_2D;
|
7101
7304
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7102
7305
|
result->src[0] = a;
|
7103
7306
|
result->src[1] = b;
|
7104
|
-
result->src[2] = ggml_new_i32(ctx, stride);
|
7105
7307
|
|
7106
7308
|
return result;
|
7107
7309
|
}
|
@@ -9446,6 +9648,8 @@ static void ggml_compute_forward_div_f32(
|
|
9446
9648
|
|
9447
9649
|
|
9448
9650
|
#ifdef GGML_USE_ACCELERATE
|
9651
|
+
UNUSED(ggml_vec_div_f32);
|
9652
|
+
|
9449
9653
|
vDSP_vdiv(
|
9450
9654
|
(float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1,
|
9451
9655
|
(float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1,
|
@@ -10752,7 +10956,8 @@ static void ggml_compute_forward_rms_norm_back_f32(
|
|
10752
10956
|
|
10753
10957
|
GGML_TENSOR_BINARY_OP_LOCALS;
|
10754
10958
|
|
10755
|
-
|
10959
|
+
float eps;
|
10960
|
+
memcpy(&eps, dst->op_params, sizeof(float));
|
10756
10961
|
|
10757
10962
|
// TODO: optimize
|
10758
10963
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
@@ -11930,8 +12135,8 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11930
12135
|
const int ith = params->ith;
|
11931
12136
|
const int nth = params->nth;
|
11932
12137
|
|
11933
|
-
const int n_past =
|
11934
|
-
const bool inplace =
|
12138
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
12139
|
+
const bool inplace = src0->data == dst->data;
|
11935
12140
|
|
11936
12141
|
GGML_ASSERT(n_past >= 0);
|
11937
12142
|
|
@@ -12142,6 +12347,7 @@ static void ggml_compute_forward_soft_max_back_f32(
|
|
12142
12347
|
// dx = J * dy
|
12143
12348
|
// dxk = sum_i(Jki * dyi)
|
12144
12349
|
// dxk = sum_i(-yk*yi * dyi) - (-yk*yk)*dyk + (yk - yk*yk)*dyk
|
12350
|
+
// dxk = sum_i(-yk*yi * dyi) + yk*yk*dyk + yk*dyk - yk*yk*dyk
|
12145
12351
|
// dxk = sum_i(-yk*yi * dyi) + yk*dyk
|
12146
12352
|
// dxk = -yk * sum_i(yi * dyi) + yk*dyk
|
12147
12353
|
// dxk = -yk * dot(y, dy) + yk*dyk
|
@@ -13497,7 +13703,6 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
13497
13703
|
const struct ggml_compute_params * params,
|
13498
13704
|
const struct ggml_tensor * src0,
|
13499
13705
|
const struct ggml_tensor * src1,
|
13500
|
-
const struct ggml_tensor * opt0,
|
13501
13706
|
struct ggml_tensor * dst) {
|
13502
13707
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
13503
13708
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
@@ -13557,7 +13762,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
13557
13762
|
return;
|
13558
13763
|
}
|
13559
13764
|
|
13560
|
-
const int32_t stride = (
|
13765
|
+
const int32_t stride = ggml_get_op_params_i32(dst, 0);
|
13561
13766
|
|
13562
13767
|
// total patches in dst
|
13563
13768
|
const int np = ne2;
|
@@ -13570,7 +13775,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
13570
13775
|
const int ip1 = MIN(ip0 + dp, np);
|
13571
13776
|
|
13572
13777
|
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
13573
|
-
ggml_fp16_t * const wdata_src =
|
13778
|
+
ggml_fp16_t * const wdata_src = wdata + nk;
|
13574
13779
|
|
13575
13780
|
for (int i2 = ip0; i2 < ip1; i2++) { // Cout
|
13576
13781
|
float * dst_data = (float *)((char *) dst->data + i2*nb2);
|
@@ -13582,9 +13787,8 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
13582
13787
|
for (int i00 = 0; i00 < ne00; i00++) {
|
13583
13788
|
float v = 0;
|
13584
13789
|
ggml_vec_dot_f16(ne03, &v,
|
13585
|
-
|
13586
|
-
|
13587
|
-
|
13790
|
+
wdata_src + i1n,
|
13791
|
+
wdata_kernel + i01*ne00*ne03 + i00*ne03);
|
13588
13792
|
dst_data[(i11*stride + i01)*ne0 + i10*stride + i00] += v;
|
13589
13793
|
}
|
13590
13794
|
}
|
@@ -13934,7 +14138,7 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13934
14138
|
vvexpf(S, S, &Mup);
|
13935
14139
|
ggml_vec_sum_f32(Mup, &sum, S);
|
13936
14140
|
#else
|
13937
|
-
uint16_t scvt[GGML_SOFT_MAX_UNROLL];
|
14141
|
+
uint16_t scvt[GGML_SOFT_MAX_UNROLL]; UNUSED(scvt);
|
13938
14142
|
ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 };
|
13939
14143
|
|
13940
14144
|
for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) {
|
@@ -13944,9 +14148,13 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13944
14148
|
if (SS[j] == -INFINITY) {
|
13945
14149
|
SS[j] = 0.0f;
|
13946
14150
|
} else {
|
14151
|
+
#ifndef GGML_FLASH_ATTN_EXP_FP16
|
14152
|
+
const float val = expf(SS[j] - max);
|
14153
|
+
#else
|
13947
14154
|
ggml_fp16_t s = GGML_FP32_TO_FP16(SS[j] - max);
|
13948
14155
|
memcpy(&scvt[j], &s, sizeof(uint16_t));
|
13949
14156
|
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt[j]]);
|
14157
|
+
#endif
|
13950
14158
|
sump[j] += (ggml_float)val;
|
13951
14159
|
SS[j] = val;
|
13952
14160
|
}
|
@@ -14524,7 +14732,7 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
14524
14732
|
vvexpf(SM, SM, &Mup);
|
14525
14733
|
ggml_vec_sum_f32(Mup, &sum, SM);
|
14526
14734
|
#else
|
14527
|
-
uint16_t scvt[GGML_SOFT_MAX_UNROLL];
|
14735
|
+
uint16_t scvt[GGML_SOFT_MAX_UNROLL]; UNUSED(scvt);
|
14528
14736
|
ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 };
|
14529
14737
|
|
14530
14738
|
for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) {
|
@@ -14535,9 +14743,13 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
14535
14743
|
if (SR[j] == -INFINITY) {
|
14536
14744
|
SW[j] = 0.0f;
|
14537
14745
|
} else {
|
14746
|
+
#ifndef GGML_FLASH_ATTN_EXP_FP16
|
14747
|
+
const float val = expf(SR[j] - max);
|
14748
|
+
#else
|
14538
14749
|
ggml_fp16_t s = GGML_FP32_TO_FP16(SR[j] - max);
|
14539
14750
|
memcpy(&scvt[j], &s, sizeof(uint16_t));
|
14540
14751
|
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt[j]]);
|
14752
|
+
#endif
|
14541
14753
|
sump[j] += (ggml_float)val;
|
14542
14754
|
SW[j] = val;
|
14543
14755
|
}
|
@@ -15275,6 +15487,8 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
15275
15487
|
const int nc = src0->ne[0];
|
15276
15488
|
const int nr = ggml_nrows(src0);
|
15277
15489
|
|
15490
|
+
GGML_ASSERT(params->wsize >= sizeof(float) * (nth + nth * nc));
|
15491
|
+
|
15278
15492
|
if (params->type == GGML_TASK_INIT) {
|
15279
15493
|
if (ith == 0) {
|
15280
15494
|
memset(sums, 0, sizeof(float) * (nth + nth * nc));
|
@@ -15286,7 +15500,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
15286
15500
|
if (ith == 0) {
|
15287
15501
|
float * dp = (float *) dst->data;
|
15288
15502
|
ggml_vec_sum_f32(nth, dp, sums);
|
15289
|
-
dp[0] *= -1.0f;
|
15503
|
+
dp[0] *= -1.0f / (float) nr;
|
15290
15504
|
}
|
15291
15505
|
return;
|
15292
15506
|
}
|
@@ -15303,7 +15517,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
15303
15517
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
15304
15518
|
float * s0 = (float *)((char *) src0->data + i1*src0->nb[1]);
|
15305
15519
|
float * s1 = (float *)((char *) src1->data + i1*src1->nb[1]);
|
15306
|
-
float * st = (float *) params->wdata + nth + ith*nc;
|
15520
|
+
float * st = ((float *) params->wdata) + nth + ith*nc;
|
15307
15521
|
|
15308
15522
|
#ifndef NDEBUG
|
15309
15523
|
for (int i = 0; i < nc; ++i) {
|
@@ -15318,15 +15532,19 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
15318
15532
|
float max = -INFINITY;
|
15319
15533
|
ggml_vec_max_f32(nc, &max, s0);
|
15320
15534
|
|
15321
|
-
uint16_t scvt;
|
15535
|
+
uint16_t scvt; UNUSED(scvt);
|
15322
15536
|
for (int i = 0; i < nc; i++) {
|
15323
15537
|
if (s0[i] == -INFINITY) {
|
15324
15538
|
st[i] = 0.0f;
|
15325
15539
|
} else {
|
15326
|
-
|
15540
|
+
#ifndef GGML_CROSS_ENTROPY_EXP_FP16
|
15541
|
+
const float s = s0[i] - max;
|
15542
|
+
const float val = expf(s);
|
15543
|
+
#else
|
15327
15544
|
ggml_fp16_t s = GGML_FP32_TO_FP16(s0[i] - max);
|
15328
15545
|
memcpy(&scvt, &s, sizeof(scvt));
|
15329
15546
|
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt]);
|
15547
|
+
#endif
|
15330
15548
|
sum += (ggml_float)val;
|
15331
15549
|
st[i] = val;
|
15332
15550
|
}
|
@@ -15342,7 +15560,9 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
15342
15560
|
ggml_vec_log_f32(nc, st, st);
|
15343
15561
|
ggml_vec_mul_f32(nc, st, st, s1);
|
15344
15562
|
|
15345
|
-
|
15563
|
+
float st_sum = 0;
|
15564
|
+
ggml_vec_sum_f32(nc, &st_sum, st);
|
15565
|
+
sums[ith] += st_sum;
|
15346
15566
|
|
15347
15567
|
#ifndef NDEBUG
|
15348
15568
|
for (int i = 0; i < nc; ++i) {
|
@@ -15392,7 +15612,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
15392
15612
|
return;
|
15393
15613
|
}
|
15394
15614
|
|
15395
|
-
const
|
15615
|
+
const double eps = 1e-9;
|
15396
15616
|
|
15397
15617
|
// TODO: handle transposed/permuted matrices
|
15398
15618
|
const int64_t nc = src0->ne[0];
|
@@ -15411,7 +15631,6 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
15411
15631
|
float * ds0 = (float *)((char *) dst->data + i1*dst->nb[1]);
|
15412
15632
|
float * s0 = (float *)((char *) src0->data + i1*src0->nb[1]);
|
15413
15633
|
float * s1 = (float *)((char *) src1->data + i1*src1->nb[1]);
|
15414
|
-
float * sm = (float *) params->wdata + ith*nc;
|
15415
15634
|
|
15416
15635
|
#ifndef NDEBUG
|
15417
15636
|
for (int i = 0; i < nc; ++i) {
|
@@ -15420,54 +15639,6 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
15420
15639
|
assert(!isnan(s1[i]));
|
15421
15640
|
}
|
15422
15641
|
#endif
|
15423
|
-
// step by step explanation:
|
15424
|
-
{
|
15425
|
-
//float * sums = (float *) params->wdata;
|
15426
|
-
|
15427
|
-
// forward pass with annotated gradients from backward pass
|
15428
|
-
// (built by going in reverse operation order, adding to gradients of current operation args)
|
15429
|
-
// st0 = exp(s0-max(s0)) grad[st0] = grad[st1]*(1.0 - eps)/sum
|
15430
|
-
// from softmax_back: grad[s0] = st1_k * (grad[st1]_k - dot(st1, grad[st1]))
|
15431
|
-
// ggml_vec_scale_f32(nc, st, sum); // st1 = st0*/sum = softmax(s0) grad[st1] = grad[st2]*(1.0 - eps)
|
15432
|
-
// ggml_vec_scale_f32(nc, st, (1.0f - eps)); // st2 = st1*(1.0 - eps) grad[st2] = grad[st3]
|
15433
|
-
// ggml_vec_add1_f32(nc, st, st, eps); // st3 = st2 + eps grad[st3] = grad[st4]/st3
|
15434
|
-
// ggml_vec_log_f32(nc, st, st); // st4 = log(st3) grad[st4] = grad[st5] * s1
|
15435
|
-
// ggml_vec_mul_f32(nc, st, st, s1); // st5 = st4 * s1 grad[st5] = grad[sums[ith]]
|
15436
|
-
// ggml_vec_sum_f32(nc, sums + ith, st); // sums[ith] = st5 grad[sums[ith]] = grad[cross_entropy_loss] = -grad[cel]
|
15437
|
-
|
15438
|
-
// substitute into grad[st1], because we can reuse softmax_back from this point on
|
15439
|
-
// grad[st1] = -grad[cel]*s1*(1.0 - eps)/(eps + softmax(s0)*(1.0 - eps))
|
15440
|
-
// postorder:
|
15441
|
-
// grad[st1] := softmax(s0)
|
15442
|
-
// grad[st1] := grad[st1]*(1.0 - eps)
|
15443
|
-
// grad[st1] := grad[st1] + eps
|
15444
|
-
// grad[st1] := s1 / grad[st1]
|
15445
|
-
// grad[st1] := grad[st1]*(1.0-eps)*-grad[cel]
|
15446
|
-
|
15447
|
-
// src0 gradients by going through softmax_back
|
15448
|
-
// grad[s0] = st1_k * (grad[st1]_k - dot(st1, grad[st1]))
|
15449
|
-
// from softmax_back:
|
15450
|
-
// dxk = yk * (dyk - dot(y, dy))
|
15451
|
-
// dot_y_dy := dot(y, dy)
|
15452
|
-
// dx := dy
|
15453
|
-
// dx := dx - dot_y_dy
|
15454
|
-
// dx := dx * y
|
15455
|
-
// postorder:
|
15456
|
-
// dot_st1_dst1 := dot(st1, grad[st1])
|
15457
|
-
// grad[s0] := grad[st1]
|
15458
|
-
// grad[s0] := grad[s0] - dot_st1_dst1
|
15459
|
-
// grad[s0] := grad[s0] * st1
|
15460
|
-
|
15461
|
-
// prepend postorder from grad[st1] directly using grad[s0] as memory location, as we will grad[s0] := grad[st1]
|
15462
|
-
// sm := softmax(s0)
|
15463
|
-
// grad[s0] := sm*(1.0 - eps)
|
15464
|
-
// grad[s0] := grad[s0] + eps
|
15465
|
-
// grad[s0] := s1 / grad[s0]
|
15466
|
-
// grad[s0] := grad[s0]*(1.0-eps)*-grad[cel]
|
15467
|
-
// dot_st1_dst1 := dot(sm, grad[s0])
|
15468
|
-
// grad[s0] := grad[s0] - dot_st1_dst1
|
15469
|
-
// grad[s0] := grad[s0] * sm
|
15470
|
-
}
|
15471
15642
|
|
15472
15643
|
// soft_max
|
15473
15644
|
ggml_float sum = 0.0;
|
@@ -15475,39 +15646,37 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
15475
15646
|
float max = -INFINITY;
|
15476
15647
|
ggml_vec_max_f32(nc, &max, s0);
|
15477
15648
|
|
15478
|
-
uint16_t scvt;
|
15649
|
+
uint16_t scvt; UNUSED(scvt);
|
15479
15650
|
for (int i = 0; i < nc; i++) {
|
15480
15651
|
if (s0[i] == -INFINITY) {
|
15481
|
-
|
15652
|
+
ds0[i] = 0.0f;
|
15482
15653
|
} else {
|
15483
|
-
|
15654
|
+
#ifndef GGML_CROSS_ENTROPY_EXP_FP16
|
15655
|
+
const float s = s0[i] - max;
|
15656
|
+
const float val = expf(s);
|
15657
|
+
#else
|
15484
15658
|
ggml_fp16_t s = GGML_FP32_TO_FP16(s0[i] - max);
|
15485
15659
|
memcpy(&scvt, &s, sizeof(scvt));
|
15486
15660
|
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt]);
|
15661
|
+
#endif
|
15487
15662
|
sum += (ggml_float)val;
|
15488
|
-
|
15663
|
+
ds0[i] = val;
|
15489
15664
|
}
|
15490
15665
|
}
|
15491
15666
|
|
15492
15667
|
assert(sum > 0.0);
|
15493
|
-
sum = 1.0/sum;
|
15668
|
+
sum = (1.0 - eps)/sum;
|
15494
15669
|
}
|
15495
15670
|
|
15496
|
-
|
15497
|
-
ggml_vec_scale_f32(nc,
|
15498
|
-
|
15499
|
-
|
15500
|
-
|
15501
|
-
|
15502
|
-
ggml_vec_scale_f32(nc, ds0, -(1.0f - eps)*d[0]);
|
15503
|
-
ggml_vec_dot_f32 (nc, &dot_st1_dst1, sm, ds0);
|
15504
|
-
ggml_vec_acc1_f32 (nc, ds0, -dot_st1_dst1);
|
15505
|
-
ggml_vec_mul_f32 (nc, ds0, ds0, sm);
|
15671
|
+
// grad(src0) = (softmax(src0) - src1) * grad(cross_entropy_loss(src0, src1)) / nr
|
15672
|
+
ggml_vec_scale_f32(nc, ds0, sum);
|
15673
|
+
ggml_vec_add1_f32(nc, ds0, ds0, eps);
|
15674
|
+
ggml_vec_sub_f32(nc, ds0, ds0, s1);
|
15675
|
+
ggml_vec_scale_f32(nc, ds0, d[0] / (float) nr);
|
15676
|
+
|
15506
15677
|
|
15507
15678
|
#ifndef NDEBUG
|
15508
15679
|
for (int i = 0; i < nc; ++i) {
|
15509
|
-
assert(!isnan(sm[i]));
|
15510
|
-
assert(!isinf(sm[i]));
|
15511
15680
|
assert(!isnan(ds0[i]));
|
15512
15681
|
assert(!isinf(ds0[i]));
|
15513
15682
|
}
|
@@ -15731,7 +15900,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15731
15900
|
} break;
|
15732
15901
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
15733
15902
|
{
|
15734
|
-
ggml_compute_forward_conv_transpose_2d(params, tensor->src[0], tensor->src[1], tensor
|
15903
|
+
ggml_compute_forward_conv_transpose_2d(params, tensor->src[0], tensor->src[1], tensor);
|
15735
15904
|
} break;
|
15736
15905
|
case GGML_OP_POOL_1D:
|
15737
15906
|
{
|
@@ -16062,9 +16231,12 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
16062
16231
|
{
|
16063
16232
|
// necessary for llama
|
16064
16233
|
if (src0->grad) {
|
16234
|
+
float eps;
|
16235
|
+
memcpy(&eps, tensor->op_params, sizeof(float));
|
16236
|
+
|
16065
16237
|
src0->grad = ggml_add_impl(ctx,
|
16066
16238
|
src0->grad,
|
16067
|
-
ggml_rms_norm_back(ctx, src0, tensor->grad),
|
16239
|
+
ggml_rms_norm_back(ctx, src0, tensor->grad, eps),
|
16068
16240
|
inplace);
|
16069
16241
|
}
|
16070
16242
|
} break;
|
@@ -16832,9 +17004,7 @@ struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) {
|
|
16832
17004
|
return result;
|
16833
17005
|
}
|
16834
17006
|
|
16835
|
-
|
16836
|
-
struct ggml_cgraph result = *gf;
|
16837
|
-
|
17007
|
+
void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep) {
|
16838
17008
|
GGML_ASSERT(gf->n_nodes > 0);
|
16839
17009
|
|
16840
17010
|
// if we are keeping the gradient graph, we have to detach the gradient nodes from the original graph
|
@@ -16858,15 +17028,19 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
|
|
16858
17028
|
}
|
16859
17029
|
}
|
16860
17030
|
|
16861
|
-
for (int i =
|
17031
|
+
for (int i = 0; i < gf->n_nodes; i++) {
|
16862
17032
|
struct ggml_tensor * node = gf->nodes[i];
|
16863
17033
|
|
16864
17034
|
if (node->is_param) {
|
16865
17035
|
GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
|
16866
|
-
ggml_build_forward_expand(
|
17036
|
+
ggml_build_forward_expand(gb, node->grad);
|
16867
17037
|
}
|
16868
17038
|
}
|
17039
|
+
}
|
16869
17040
|
|
17041
|
+
struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep) {
|
17042
|
+
struct ggml_cgraph result = *gf;
|
17043
|
+
ggml_build_backward_expand(ctx, gf, &result, keep);
|
16870
17044
|
return result;
|
16871
17045
|
}
|
16872
17046
|
|
@@ -17542,10 +17716,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
17542
17716
|
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
17543
17717
|
{
|
17544
17718
|
n_tasks = n_threads;
|
17545
|
-
|
17546
|
-
size_t cur = ggml_type_size(node->type)*node->src[0]->ne[0]*n_tasks;
|
17547
|
-
|
17548
|
-
work_size = MAX(work_size, cur);
|
17549
17719
|
} break;
|
17550
17720
|
case GGML_OP_NONE:
|
17551
17721
|
{
|
@@ -18423,14 +18593,16 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18423
18593
|
struct ggml_opt_params params,
|
18424
18594
|
struct ggml_tensor * f,
|
18425
18595
|
struct ggml_cgraph * gf,
|
18426
|
-
struct ggml_cgraph * gb
|
18596
|
+
struct ggml_cgraph * gb,
|
18597
|
+
ggml_opt_callback callback,
|
18598
|
+
void * callback_data) {
|
18427
18599
|
GGML_ASSERT(ggml_is_scalar(f));
|
18428
18600
|
|
18429
18601
|
// these will store the parameters we want to optimize
|
18430
18602
|
struct ggml_tensor * ps[GGML_MAX_PARAMS];
|
18431
18603
|
|
18432
18604
|
int np = 0;
|
18433
|
-
|
18605
|
+
int64_t nx = 0;
|
18434
18606
|
for (int i = 0; i < gf->n_nodes; ++i) {
|
18435
18607
|
if (gf->nodes[i]->is_param) {
|
18436
18608
|
GGML_PRINT_DEBUG("found param %d: grad->op = %d\n", np, gf->nodes[i]->grad->op);
|
@@ -18449,31 +18621,32 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18449
18621
|
}
|
18450
18622
|
|
18451
18623
|
// constants
|
18452
|
-
|
18453
|
-
const float
|
18454
|
-
const float
|
18624
|
+
float sched = params.adam.sched;
|
18625
|
+
const float alpha = params.adam.alpha;
|
18626
|
+
const float decay = params.adam.decay * alpha;
|
18455
18627
|
const float beta1 = params.adam.beta1;
|
18456
18628
|
const float beta2 = params.adam.beta2;
|
18457
18629
|
const float eps = params.adam.eps;
|
18630
|
+
const float gclip = params.adam.gclip;
|
18631
|
+
const int decay_min_ndim = params.adam.decay_min_ndim;
|
18458
18632
|
|
18459
|
-
float * x = opt->adam.x->data; // view of the parameters
|
18460
|
-
float * g1 = opt->adam.g1->data; // gradient
|
18461
|
-
float * g2 = opt->adam.g2->data; // gradient squared
|
18462
18633
|
float * m = opt->adam.m->data; // first moment
|
18463
18634
|
float * v = opt->adam.v->data; // second moment
|
18464
|
-
float * mh = opt->adam.mh->data; // first moment hat
|
18465
|
-
float * vh = opt->adam.vh->data; // second moment hat
|
18466
18635
|
|
18467
18636
|
float * pf = params.past > 0 ? opt->adam.pf->data : NULL; // past function values
|
18468
18637
|
|
18469
|
-
|
18470
|
-
|
18638
|
+
if (callback) {
|
18639
|
+
callback(callback_data, &sched);
|
18640
|
+
}
|
18471
18641
|
|
18472
18642
|
// compute the function value
|
18473
18643
|
ggml_graph_reset (gf);
|
18474
18644
|
ggml_set_f32 (f->grad, 1.0f);
|
18475
18645
|
|
18476
|
-
|
18646
|
+
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
18647
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
|
18648
|
+
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
18649
|
+
ggml_graph_compute(gb, &cplan);
|
18477
18650
|
|
18478
18651
|
opt->adam.fx_prev = ggml_get_f32_1d(f, 0);
|
18479
18652
|
opt->adam.fx_best = opt->adam.fx_prev;
|
@@ -18481,6 +18654,9 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18481
18654
|
pf[opt->iter % params.past] = opt->adam.fx_prev;
|
18482
18655
|
}
|
18483
18656
|
|
18657
|
+
opt->loss_before = opt->adam.fx_prev;
|
18658
|
+
opt->loss_after = opt->adam.fx_prev;
|
18659
|
+
|
18484
18660
|
// initialize
|
18485
18661
|
if (opt->just_initialized) {
|
18486
18662
|
opt->adam.n_no_improvement = 0;
|
@@ -18513,50 +18689,55 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18513
18689
|
UNUSED(t_start_cpu);
|
18514
18690
|
|
18515
18691
|
{
|
18516
|
-
|
18517
|
-
|
18518
|
-
|
18519
|
-
|
18520
|
-
|
18521
|
-
|
18522
|
-
|
18523
|
-
|
18524
|
-
|
18525
|
-
|
18526
|
-
|
18527
|
-
|
18528
|
-
|
18529
|
-
|
18530
|
-
|
18531
|
-
|
18532
|
-
|
18533
|
-
|
18534
|
-
|
18535
|
-
|
18536
|
-
|
18537
|
-
|
18538
|
-
|
18539
|
-
|
18540
|
-
|
18541
|
-
|
18542
|
-
|
18543
|
-
|
18544
|
-
|
18545
|
-
|
18546
|
-
|
18547
|
-
|
18548
|
-
|
18692
|
+
float gnorm = 1.0f;
|
18693
|
+
if (gclip > 0.0f) {
|
18694
|
+
// gradient clipping
|
18695
|
+
ggml_float sum = 0.0;
|
18696
|
+
for (int p = 0; p < np; ++p) {
|
18697
|
+
const int64_t ne = ggml_nelements(ps[p]);
|
18698
|
+
for (int64_t j = 0; j < ne; ++j) {
|
18699
|
+
float g = ggml_get_f32_1d(ps[p]->grad, j);
|
18700
|
+
sum += (ggml_float)(g*g);
|
18701
|
+
}
|
18702
|
+
}
|
18703
|
+
ggml_float norm = sqrt(sum);
|
18704
|
+
if (norm > (ggml_float) gclip) {
|
18705
|
+
gnorm = (float) ((ggml_float) gclip / norm);
|
18706
|
+
}
|
18707
|
+
}
|
18708
|
+
const float beta1h = alpha*sched/(1.0f - powf(beta1, opt->iter));
|
18709
|
+
const float beta2h = 1.0f/(1.0f - powf(beta2, opt->iter));
|
18710
|
+
int64_t i = 0;
|
18711
|
+
for (int p = 0; p < np; ++p) {
|
18712
|
+
const int64_t ne = ggml_nelements(ps[p]);
|
18713
|
+
const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0f) * sched;
|
18714
|
+
for (int64_t j = 0; j < ne; ++j) {
|
18715
|
+
float x = ggml_get_f32_1d(ps[p], j);
|
18716
|
+
float g = ggml_get_f32_1d(ps[p]->grad, j)*gnorm;
|
18717
|
+
m[i] = m[i]*beta1 + g*(1.0f - beta1);
|
18718
|
+
v[i] = v[i]*beta2 + g*g*(1.0f - beta2);
|
18719
|
+
float mh = m[i]*beta1h;
|
18720
|
+
float vh = v[i]*beta2h;
|
18721
|
+
vh = sqrtf(vh) + eps;
|
18722
|
+
x = x*(1.0f - p_decay) - mh/vh;
|
18723
|
+
ggml_set_f32_1d(ps[p], j, x);
|
18724
|
+
++i;
|
18725
|
+
}
|
18726
|
+
}
|
18727
|
+
}
|
18549
18728
|
|
18550
|
-
|
18551
|
-
|
18729
|
+
if (callback) {
|
18730
|
+
callback(callback_data, &sched);
|
18552
18731
|
}
|
18553
18732
|
|
18554
18733
|
ggml_graph_reset (gf);
|
18555
18734
|
ggml_set_f32 (f->grad, 1.0f);
|
18556
18735
|
|
18557
|
-
|
18736
|
+
ggml_graph_compute(gb, &cplan);
|
18558
18737
|
|
18559
18738
|
const float fx = ggml_get_f32_1d(f, 0);
|
18739
|
+
opt->loss_after = fx;
|
18740
|
+
|
18560
18741
|
|
18561
18742
|
// check convergence
|
18562
18743
|
if (fabsf(fx - fx_prev[0])/fx < params.adam.eps_f) {
|
@@ -18625,7 +18806,6 @@ struct ggml_lbfgs_iteration_data {
|
|
18625
18806
|
};
|
18626
18807
|
|
18627
18808
|
static enum ggml_opt_result linesearch_backtracking(
|
18628
|
-
struct ggml_context * ctx,
|
18629
18809
|
const struct ggml_opt_params * params,
|
18630
18810
|
int nx,
|
18631
18811
|
float * x,
|
@@ -18637,8 +18817,11 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18637
18817
|
struct ggml_tensor * f,
|
18638
18818
|
struct ggml_cgraph * gf,
|
18639
18819
|
struct ggml_cgraph * gb,
|
18820
|
+
struct ggml_cplan * cplan,
|
18640
18821
|
const int np,
|
18641
|
-
struct ggml_tensor * ps[]
|
18822
|
+
struct ggml_tensor * ps[],
|
18823
|
+
ggml_opt_callback callback,
|
18824
|
+
void * callback_data) {
|
18642
18825
|
int count = 0;
|
18643
18826
|
|
18644
18827
|
float width = 0.0f;
|
@@ -18667,6 +18850,12 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18667
18850
|
dgtest = params->lbfgs.ftol*dginit;
|
18668
18851
|
|
18669
18852
|
while (true) {
|
18853
|
+
if (callback) {
|
18854
|
+
// LBFG-S does not support learning rate -> ignore learning schedule
|
18855
|
+
float sched = 0;
|
18856
|
+
callback(callback_data, &sched);
|
18857
|
+
}
|
18858
|
+
|
18670
18859
|
ggml_vec_cpy_f32(nx, x, xp);
|
18671
18860
|
ggml_vec_mad_f32(nx, x, d, *step);
|
18672
18861
|
|
@@ -18677,7 +18866,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18677
18866
|
ggml_graph_reset (gf);
|
18678
18867
|
ggml_set_f32 (f->grad, 1.0f);
|
18679
18868
|
|
18680
|
-
|
18869
|
+
ggml_graph_compute(gb, cplan);
|
18681
18870
|
|
18682
18871
|
ggml_opt_get_grad(np, ps, g);
|
18683
18872
|
|
@@ -18737,7 +18926,9 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18737
18926
|
struct ggml_opt_params params,
|
18738
18927
|
struct ggml_tensor * f,
|
18739
18928
|
struct ggml_cgraph * gf,
|
18740
|
-
struct ggml_cgraph * gb
|
18929
|
+
struct ggml_cgraph * gb,
|
18930
|
+
ggml_opt_callback callback,
|
18931
|
+
void * callback_data) {
|
18741
18932
|
if (params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE ||
|
18742
18933
|
params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
|
18743
18934
|
if (params.lbfgs.wolfe <= params.lbfgs.ftol || 1.f <= params.lbfgs.wolfe) {
|
@@ -18769,6 +18960,10 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18769
18960
|
opt->iter = iter;
|
18770
18961
|
}
|
18771
18962
|
|
18963
|
+
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
18964
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
|
18965
|
+
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
18966
|
+
|
18772
18967
|
float * x = opt->lbfgs.x->data; // current parameters
|
18773
18968
|
float * xp = opt->lbfgs.xp->data; // previous parameters
|
18774
18969
|
float * g = opt->lbfgs.g->data; // current gradient
|
@@ -18790,6 +18985,12 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18790
18985
|
float * lm_s = opt->lbfgs.lms->data;
|
18791
18986
|
float * lm_y = opt->lbfgs.lmy->data;
|
18792
18987
|
|
18988
|
+
if (callback) {
|
18989
|
+
// LBFG-S does not support learning rate -> ignore learning schedule
|
18990
|
+
float sched = 0;
|
18991
|
+
callback(callback_data, &sched);
|
18992
|
+
}
|
18993
|
+
|
18793
18994
|
// evaluate the function value and its gradient
|
18794
18995
|
{
|
18795
18996
|
ggml_opt_set_params(np, ps, x);
|
@@ -18797,11 +18998,14 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18797
18998
|
ggml_graph_reset (gf);
|
18798
18999
|
ggml_set_f32 (f->grad, 1.0f);
|
18799
19000
|
|
18800
|
-
|
19001
|
+
ggml_graph_compute(gb, &cplan);
|
18801
19002
|
|
18802
19003
|
ggml_opt_get_grad(np, ps, g);
|
18803
19004
|
|
18804
19005
|
fx = ggml_get_f32_1d(f, 0);
|
19006
|
+
|
19007
|
+
opt->loss_before = fx;
|
19008
|
+
opt->loss_after = fx;
|
18805
19009
|
}
|
18806
19010
|
|
18807
19011
|
// search direction = -gradient
|
@@ -18856,7 +19060,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18856
19060
|
ggml_vec_cpy_f32(nx, xp, x);
|
18857
19061
|
ggml_vec_cpy_f32(nx, gp, g);
|
18858
19062
|
|
18859
|
-
ls = linesearch_backtracking(
|
19063
|
+
ls = linesearch_backtracking(¶ms, nx, x, &fx, g, d, step, xp, f, gf, gb, &cplan, np, ps, callback, callback_data);
|
18860
19064
|
|
18861
19065
|
if (ls < 0) {
|
18862
19066
|
// linesearch failed - go back to the previous point and return
|
@@ -18866,6 +19070,8 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18866
19070
|
return ls;
|
18867
19071
|
}
|
18868
19072
|
|
19073
|
+
opt->loss_after = fx;
|
19074
|
+
|
18869
19075
|
ggml_vec_norm_f32(nx, &xnorm, x);
|
18870
19076
|
ggml_vec_norm_f32(nx, &gnorm, g);
|
18871
19077
|
|
@@ -18923,7 +19129,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18923
19129
|
// ys = y^t \cdot s -> 1 / \rho.
|
18924
19130
|
// yy = y^t \cdot y.
|
18925
19131
|
//
|
18926
|
-
ggml_vec_dot_f32(nx, &ys, &lm_y[end[0]*nx], &lm_s[end[0]
|
19132
|
+
ggml_vec_dot_f32(nx, &ys, &lm_y[end[0]*nx], &lm_s[end[0]*nx]);
|
18927
19133
|
ggml_vec_dot_f32(nx, &yy, &lm_y[end[0]*nx], &lm_y[end[0]*nx]);
|
18928
19134
|
|
18929
19135
|
lm_ys[end[0]] = ys;
|
@@ -18986,13 +19192,15 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
18986
19192
|
.adam = {
|
18987
19193
|
.n_iter = 10000,
|
18988
19194
|
.sched = 1.000f,
|
18989
|
-
.decay = 0.
|
19195
|
+
.decay = 0.0f,
|
19196
|
+
.decay_min_ndim = 2,
|
18990
19197
|
.alpha = 0.001f,
|
18991
19198
|
.beta1 = 0.9f,
|
18992
19199
|
.beta2 = 0.999f,
|
18993
19200
|
.eps = 1e-8f,
|
18994
19201
|
.eps_f = 1e-5f,
|
18995
19202
|
.eps_g = 1e-3f,
|
19203
|
+
.gclip = 0.0f,
|
18996
19204
|
},
|
18997
19205
|
};
|
18998
19206
|
} break;
|
@@ -19042,23 +19250,13 @@ GGML_API void ggml_opt_init(
|
|
19042
19250
|
switch (opt->params.type) {
|
19043
19251
|
case GGML_OPT_ADAM:
|
19044
19252
|
{
|
19045
|
-
opt->adam.x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx);
|
19046
|
-
opt->adam.g1 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx);
|
19047
|
-
opt->adam.g2 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx);
|
19048
19253
|
opt->adam.m = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx);
|
19049
19254
|
opt->adam.v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx);
|
19050
|
-
opt->adam.mh = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx);
|
19051
|
-
opt->adam.vh = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx);
|
19052
19255
|
opt->adam.pf = params.past > 0
|
19053
19256
|
? ggml_new_tensor_1d(ctx, GGML_TYPE_F32, params.past)
|
19054
19257
|
: NULL;
|
19055
|
-
ggml_set_zero(opt->adam.x);
|
19056
|
-
ggml_set_zero(opt->adam.g1);
|
19057
|
-
ggml_set_zero(opt->adam.g2);
|
19058
19258
|
ggml_set_zero(opt->adam.m);
|
19059
19259
|
ggml_set_zero(opt->adam.v);
|
19060
|
-
ggml_set_zero(opt->adam.mh);
|
19061
|
-
ggml_set_zero(opt->adam.vh);
|
19062
19260
|
if (opt->adam.pf) {
|
19063
19261
|
ggml_set_zero(opt->adam.pf);
|
19064
19262
|
}
|
@@ -19142,7 +19340,7 @@ enum ggml_opt_result ggml_opt_resume(
|
|
19142
19340
|
*gf = ggml_build_forward (f);
|
19143
19341
|
*gb = ggml_build_backward(ctx, gf, true);
|
19144
19342
|
|
19145
|
-
return ggml_opt_resume_g(ctx, opt, f, gf, gb);
|
19343
|
+
return ggml_opt_resume_g(ctx, opt, f, gf, gb, NULL, NULL);
|
19146
19344
|
}
|
19147
19345
|
|
19148
19346
|
enum ggml_opt_result ggml_opt_resume_g(
|
@@ -19150,7 +19348,9 @@ enum ggml_opt_result ggml_opt_resume_g(
|
|
19150
19348
|
struct ggml_opt_context * opt,
|
19151
19349
|
struct ggml_tensor * f,
|
19152
19350
|
struct ggml_cgraph * gf,
|
19153
|
-
struct ggml_cgraph * gb
|
19351
|
+
struct ggml_cgraph * gb,
|
19352
|
+
ggml_opt_callback callback,
|
19353
|
+
void * callback_data) {
|
19154
19354
|
|
19155
19355
|
// build forward + backward compute graphs
|
19156
19356
|
enum ggml_opt_result result = GGML_OPT_OK;
|
@@ -19158,11 +19358,11 @@ enum ggml_opt_result ggml_opt_resume_g(
|
|
19158
19358
|
switch (opt->params.type) {
|
19159
19359
|
case GGML_OPT_ADAM:
|
19160
19360
|
{
|
19161
|
-
result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb);
|
19361
|
+
result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
19162
19362
|
} break;
|
19163
19363
|
case GGML_OPT_LBFGS:
|
19164
19364
|
{
|
19165
|
-
result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb);
|
19365
|
+
result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
19166
19366
|
} break;
|
19167
19367
|
}
|
19168
19368
|
|
@@ -19394,7 +19594,7 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19394
19594
|
////////////////////////////////////////////////////////////////////////////////
|
19395
19595
|
|
19396
19596
|
struct gguf_str {
|
19397
|
-
|
19597
|
+
uint64_t n; // GGUFv2
|
19398
19598
|
char * data;
|
19399
19599
|
};
|
19400
19600
|
|
@@ -19408,9 +19608,12 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
|
|
19408
19608
|
[GGUF_TYPE_FLOAT32] = sizeof(float),
|
19409
19609
|
[GGUF_TYPE_BOOL] = sizeof(bool),
|
19410
19610
|
[GGUF_TYPE_STRING] = sizeof(struct gguf_str),
|
19611
|
+
[GGUF_TYPE_UINT64] = sizeof(uint64_t),
|
19612
|
+
[GGUF_TYPE_INT64] = sizeof(int64_t),
|
19613
|
+
[GGUF_TYPE_FLOAT64] = sizeof(double),
|
19411
19614
|
[GGUF_TYPE_ARRAY] = 0, // undefined
|
19412
19615
|
};
|
19413
|
-
static_assert(GGUF_TYPE_COUNT ==
|
19616
|
+
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
|
19414
19617
|
|
19415
19618
|
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
|
19416
19619
|
[GGUF_TYPE_UINT8] = "u8",
|
@@ -19423,8 +19626,11 @@ static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
|
|
19423
19626
|
[GGUF_TYPE_BOOL] = "bool",
|
19424
19627
|
[GGUF_TYPE_STRING] = "str",
|
19425
19628
|
[GGUF_TYPE_ARRAY] = "arr",
|
19629
|
+
[GGUF_TYPE_UINT64] = "u64",
|
19630
|
+
[GGUF_TYPE_INT64] = "i64",
|
19631
|
+
[GGUF_TYPE_FLOAT64] = "f64",
|
19426
19632
|
};
|
19427
|
-
static_assert(GGUF_TYPE_COUNT ==
|
19633
|
+
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
|
19428
19634
|
|
19429
19635
|
union gguf_value {
|
19430
19636
|
uint8_t uint8;
|
@@ -19434,6 +19640,9 @@ union gguf_value {
|
|
19434
19640
|
uint32_t uint32;
|
19435
19641
|
int32_t int32;
|
19436
19642
|
float float32;
|
19643
|
+
uint64_t uint64;
|
19644
|
+
int64_t int64;
|
19645
|
+
double float64;
|
19437
19646
|
bool bool_;
|
19438
19647
|
|
19439
19648
|
struct gguf_str str;
|
@@ -19441,7 +19650,7 @@ union gguf_value {
|
|
19441
19650
|
struct {
|
19442
19651
|
enum gguf_type type;
|
19443
19652
|
|
19444
|
-
|
19653
|
+
uint64_t n; // GGUFv2
|
19445
19654
|
void * data;
|
19446
19655
|
} arr;
|
19447
19656
|
};
|
@@ -19449,8 +19658,6 @@ union gguf_value {
|
|
19449
19658
|
struct gguf_kv {
|
19450
19659
|
struct gguf_str key;
|
19451
19660
|
|
19452
|
-
uint32_t n_bytes; // TODO: is this actually needed?
|
19453
|
-
|
19454
19661
|
enum gguf_type type;
|
19455
19662
|
union gguf_value value;
|
19456
19663
|
};
|
@@ -19458,15 +19665,15 @@ struct gguf_kv {
|
|
19458
19665
|
struct gguf_header {
|
19459
19666
|
uint32_t magic;
|
19460
19667
|
uint32_t version;
|
19461
|
-
|
19462
|
-
|
19668
|
+
uint64_t n_tensors; // GGUFv2
|
19669
|
+
uint64_t n_kv; // GGUFv2
|
19463
19670
|
};
|
19464
19671
|
|
19465
19672
|
struct gguf_tensor_info {
|
19466
19673
|
struct gguf_str name;
|
19467
19674
|
|
19468
19675
|
uint32_t n_dims;
|
19469
|
-
|
19676
|
+
uint64_t ne[GGML_MAX_DIMS];
|
19470
19677
|
|
19471
19678
|
enum ggml_type type;
|
19472
19679
|
|
@@ -19497,19 +19704,32 @@ static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset)
|
|
19497
19704
|
return n == size;
|
19498
19705
|
}
|
19499
19706
|
|
19500
|
-
|
19707
|
+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
19708
|
+
static bool gguf_fread_str_cur(FILE * file, struct gguf_str * p, size_t * offset) {
|
19501
19709
|
p->n = 0;
|
19502
19710
|
p->data = NULL;
|
19503
19711
|
|
19504
19712
|
bool ok = true;
|
19505
19713
|
|
19506
|
-
// TODO: how to avoid mallocs for strings?
|
19507
19714
|
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
|
19508
19715
|
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
|
19509
19716
|
|
19510
19717
|
return ok;
|
19511
19718
|
}
|
19512
19719
|
|
19720
|
+
static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset) {
|
19721
|
+
p->n = 0;
|
19722
|
+
p->data = NULL;
|
19723
|
+
|
19724
|
+
bool ok = true;
|
19725
|
+
|
19726
|
+
uint32_t n = 0;
|
19727
|
+
ok = ok && gguf_fread_el(file, &n, sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n;
|
19728
|
+
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
|
19729
|
+
|
19730
|
+
return ok;
|
19731
|
+
}
|
19732
|
+
|
19513
19733
|
struct gguf_context * gguf_init_empty(void) {
|
19514
19734
|
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
19515
19735
|
|
@@ -19565,8 +19785,21 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19565
19785
|
ctx->data = NULL;
|
19566
19786
|
|
19567
19787
|
ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
|
19568
|
-
|
19569
|
-
|
19788
|
+
|
19789
|
+
if (ctx->header.version == 1) {
|
19790
|
+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
19791
|
+
uint32_t n_tensors = 0;
|
19792
|
+
uint32_t n_kv = 0;
|
19793
|
+
|
19794
|
+
ok = ok && gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset);
|
19795
|
+
ok = ok && gguf_fread_el(file, &n_kv, sizeof(n_kv), &offset);
|
19796
|
+
|
19797
|
+
ctx->header.n_tensors = n_tensors;
|
19798
|
+
ctx->header.n_kv = n_kv;
|
19799
|
+
} else {
|
19800
|
+
ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
|
19801
|
+
ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
|
19802
|
+
}
|
19570
19803
|
|
19571
19804
|
if (!ok) {
|
19572
19805
|
fprintf(stderr, "%s: failed to read header\n", __func__);
|
@@ -19576,18 +19809,23 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19576
19809
|
}
|
19577
19810
|
}
|
19578
19811
|
|
19812
|
+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
19813
|
+
bool (* gguf_fread_str)(FILE *, struct gguf_str *, size_t *) = gguf_fread_str_cur;
|
19814
|
+
if (ctx->header.version == 1) {
|
19815
|
+
gguf_fread_str = gguf_fread_str_v1;
|
19816
|
+
}
|
19817
|
+
|
19579
19818
|
// read the kv pairs
|
19580
19819
|
{
|
19581
|
-
ctx->kv =
|
19820
|
+
ctx->kv = malloc(ctx->header.n_kv * sizeof(struct gguf_kv));
|
19582
19821
|
|
19583
19822
|
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
|
19584
19823
|
struct gguf_kv * kv = &ctx->kv[i];
|
19585
19824
|
|
19586
19825
|
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
19587
19826
|
|
19588
|
-
ok = ok && gguf_fread_str(file, &kv->key,
|
19589
|
-
|
19590
|
-
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
|
19827
|
+
ok = ok && gguf_fread_str(file, &kv->key, &offset);
|
19828
|
+
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
|
19591
19829
|
|
19592
19830
|
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
|
19593
19831
|
|
@@ -19599,12 +19837,23 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19599
19837
|
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
|
19600
19838
|
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
|
19601
19839
|
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
|
19840
|
+
case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
|
19841
|
+
case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
|
19842
|
+
case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
|
19602
19843
|
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
|
19603
19844
|
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
|
19604
19845
|
case GGUF_TYPE_ARRAY:
|
19605
19846
|
{
|
19606
19847
|
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
|
19607
|
-
|
19848
|
+
|
19849
|
+
if (ctx->header.version == 1) {
|
19850
|
+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
19851
|
+
uint32_t n = 0;
|
19852
|
+
ok = ok && gguf_fread_el(file, &n, sizeof(n), &offset);
|
19853
|
+
kv->value.arr.n = n;
|
19854
|
+
} else {
|
19855
|
+
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
|
19856
|
+
}
|
19608
19857
|
|
19609
19858
|
switch (kv->value.arr.type) {
|
19610
19859
|
case GGUF_TYPE_UINT8:
|
@@ -19614,6 +19863,9 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19614
19863
|
case GGUF_TYPE_UINT32:
|
19615
19864
|
case GGUF_TYPE_INT32:
|
19616
19865
|
case GGUF_TYPE_FLOAT32:
|
19866
|
+
case GGUF_TYPE_UINT64:
|
19867
|
+
case GGUF_TYPE_INT64:
|
19868
|
+
case GGUF_TYPE_FLOAT64:
|
19617
19869
|
case GGUF_TYPE_BOOL:
|
19618
19870
|
{
|
19619
19871
|
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
|
@@ -19648,7 +19900,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19648
19900
|
|
19649
19901
|
// read the tensor infos
|
19650
19902
|
{
|
19651
|
-
ctx->infos =
|
19903
|
+
ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
|
19652
19904
|
|
19653
19905
|
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
|
19654
19906
|
struct gguf_tensor_info * info = &ctx->infos[i];
|
@@ -19660,7 +19912,14 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19660
19912
|
ok = ok && gguf_fread_str(file, &info->name, &offset);
|
19661
19913
|
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
|
19662
19914
|
for (uint32_t j = 0; j < info->n_dims; ++j) {
|
19663
|
-
|
19915
|
+
if (ctx->header.version == 1) {
|
19916
|
+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
19917
|
+
uint32_t t = 0;
|
19918
|
+
ok = ok && gguf_fread_el(file, &t, sizeof(t), &offset);
|
19919
|
+
info->ne[j] = t;
|
19920
|
+
} else {
|
19921
|
+
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
|
19922
|
+
}
|
19664
19923
|
}
|
19665
19924
|
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
|
19666
19925
|
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
|
@@ -19842,7 +20101,7 @@ void gguf_free(struct gguf_context * ctx) {
|
|
19842
20101
|
}
|
19843
20102
|
}
|
19844
20103
|
|
19845
|
-
|
20104
|
+
free(ctx->kv);
|
19846
20105
|
}
|
19847
20106
|
|
19848
20107
|
if (ctx->infos) {
|
@@ -19854,7 +20113,7 @@ void gguf_free(struct gguf_context * ctx) {
|
|
19854
20113
|
}
|
19855
20114
|
}
|
19856
20115
|
|
19857
|
-
|
20116
|
+
free(ctx->infos);
|
19858
20117
|
}
|
19859
20118
|
|
19860
20119
|
GGML_ALIGNED_FREE(ctx);
|
@@ -19954,6 +20213,18 @@ float gguf_get_val_f32(struct gguf_context * ctx, int i) {
|
|
19954
20213
|
return ctx->kv[i].value.float32;
|
19955
20214
|
}
|
19956
20215
|
|
20216
|
+
uint64_t gguf_get_val_u64(struct gguf_context * ctx, int i) {
|
20217
|
+
return ctx->kv[i].value.uint64;
|
20218
|
+
}
|
20219
|
+
|
20220
|
+
int64_t gguf_get_val_i64(struct gguf_context * ctx, int i) {
|
20221
|
+
return ctx->kv[i].value.int64;
|
20222
|
+
}
|
20223
|
+
|
20224
|
+
double gguf_get_val_f64(struct gguf_context * ctx, int i) {
|
20225
|
+
return ctx->kv[i].value.float64;
|
20226
|
+
}
|
20227
|
+
|
19957
20228
|
bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
|
19958
20229
|
return ctx->kv[i].value.bool_;
|
19959
20230
|
}
|
@@ -20000,7 +20271,7 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
|
|
20000
20271
|
const int n_kv = gguf_get_n_kv(ctx);
|
20001
20272
|
|
20002
20273
|
ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct gguf_kv));
|
20003
|
-
ctx->kv[n_kv].key.n = strlen(key)
|
20274
|
+
ctx->kv[n_kv].key.n = strlen(key);
|
20004
20275
|
ctx->kv[n_kv].key.data = strdup(key);
|
20005
20276
|
ctx->header.n_kv++;
|
20006
20277
|
|
@@ -20056,6 +20327,27 @@ void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
|
|
20056
20327
|
ctx->kv[idx].value.float32 = val;
|
20057
20328
|
}
|
20058
20329
|
|
20330
|
+
void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
|
20331
|
+
const int idx = gguf_get_or_add_key(ctx, key);
|
20332
|
+
|
20333
|
+
ctx->kv[idx].type = GGUF_TYPE_UINT64;
|
20334
|
+
ctx->kv[idx].value.uint64 = val;
|
20335
|
+
}
|
20336
|
+
|
20337
|
+
void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
|
20338
|
+
const int idx = gguf_get_or_add_key(ctx, key);
|
20339
|
+
|
20340
|
+
ctx->kv[idx].type = GGUF_TYPE_INT64;
|
20341
|
+
ctx->kv[idx].value.int64 = val;
|
20342
|
+
}
|
20343
|
+
|
20344
|
+
void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
|
20345
|
+
const int idx = gguf_get_or_add_key(ctx, key);
|
20346
|
+
|
20347
|
+
ctx->kv[idx].type = GGUF_TYPE_FLOAT64;
|
20348
|
+
ctx->kv[idx].value.float64 = val;
|
20349
|
+
}
|
20350
|
+
|
20059
20351
|
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
|
20060
20352
|
const int idx = gguf_get_or_add_key(ctx, key);
|
20061
20353
|
|
@@ -20067,7 +20359,7 @@ void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char *
|
|
20067
20359
|
const int idx = gguf_get_or_add_key(ctx, key);
|
20068
20360
|
|
20069
20361
|
ctx->kv[idx].type = GGUF_TYPE_STRING;
|
20070
|
-
ctx->kv[idx].value.str.n = strlen(val)
|
20362
|
+
ctx->kv[idx].value.str.n = strlen(val);
|
20071
20363
|
ctx->kv[idx].value.str.data = strdup(val);
|
20072
20364
|
}
|
20073
20365
|
|
@@ -20090,7 +20382,7 @@ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char **
|
|
20090
20382
|
ctx->kv[idx].value.arr.data = malloc(n*sizeof(struct gguf_str));
|
20091
20383
|
for (int i = 0; i < n; i++) {
|
20092
20384
|
struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
|
20093
|
-
str->n = strlen(data[i])
|
20385
|
+
str->n = strlen(data[i]);
|
20094
20386
|
str->data = strdup(data[i]);
|
20095
20387
|
}
|
20096
20388
|
}
|
@@ -20106,6 +20398,9 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
|
20106
20398
|
case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
|
20107
20399
|
case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
|
20108
20400
|
case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
|
20401
|
+
case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break;
|
20402
|
+
case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break;
|
20403
|
+
case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
|
20109
20404
|
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
|
20110
20405
|
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
|
20111
20406
|
case GGUF_TYPE_ARRAY:
|
@@ -20134,7 +20429,7 @@ void gguf_add_tensor(
|
|
20134
20429
|
const int idx = ctx->header.n_tensors;
|
20135
20430
|
ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
|
20136
20431
|
|
20137
|
-
ctx->infos[idx].name.n = strlen(tensor->name)
|
20432
|
+
ctx->infos[idx].name.n = strlen(tensor->name);
|
20138
20433
|
ctx->infos[idx].name.data = strdup(tensor->name);
|
20139
20434
|
|
20140
20435
|
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
|
@@ -20267,6 +20562,9 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
|
|
20267
20562
|
case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
|
20268
20563
|
case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
|
20269
20564
|
case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
|
20565
|
+
case GGUF_TYPE_UINT64: gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break;
|
20566
|
+
case GGUF_TYPE_INT64: gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break;
|
20567
|
+
case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
|
20270
20568
|
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
|
20271
20569
|
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
|
20272
20570
|
case GGUF_TYPE_ARRAY:
|
@@ -20282,6 +20580,9 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
|
|
20282
20580
|
case GGUF_TYPE_UINT32:
|
20283
20581
|
case GGUF_TYPE_INT32:
|
20284
20582
|
case GGUF_TYPE_FLOAT32:
|
20583
|
+
case GGUF_TYPE_UINT64:
|
20584
|
+
case GGUF_TYPE_INT64:
|
20585
|
+
case GGUF_TYPE_FLOAT64:
|
20285
20586
|
case GGUF_TYPE_BOOL:
|
20286
20587
|
{
|
20287
20588
|
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
|
@@ -20516,6 +20817,14 @@ int ggml_cpu_has_sse3(void) {
|
|
20516
20817
|
#endif
|
20517
20818
|
}
|
20518
20819
|
|
20820
|
+
int ggml_cpu_has_ssse3(void) {
|
20821
|
+
#if defined(__SSSE3__)
|
20822
|
+
return 1;
|
20823
|
+
#else
|
20824
|
+
return 0;
|
20825
|
+
#endif
|
20826
|
+
}
|
20827
|
+
|
20519
20828
|
int ggml_cpu_has_vsx(void) {
|
20520
20829
|
#if defined(__POWER9_VECTOR__)
|
20521
20830
|
return 1;
|