llama_cpp 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/examples/chat.rb +2 -2
- data/ext/llama_cpp/extconf.rb +1 -1
- data/ext/llama_cpp/llama_cpp.cpp +23 -11
- data/ext/llama_cpp/src/ggml-alloc.c +13 -50
- data/ext/llama_cpp/src/ggml-cuda.cu +23 -11
- data/ext/llama_cpp/src/ggml-metal.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.m +130 -61
- data/ext/llama_cpp/src/ggml-metal.metal +44 -26
- data/ext/llama_cpp/src/ggml.c +637 -328
- data/ext/llama_cpp/src/ggml.h +45 -19
- data/ext/llama_cpp/src/k_quants.c +2 -2
- data/ext/llama_cpp/src/llama.cpp +426 -97
- data/ext/llama_cpp/src/llama.h +51 -5
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -1
- data/sig/llama_cpp.rbs +5 -3
- metadata +2 -2
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -123,6 +123,8 @@ typedef void * thread_ret_t;
|
|
123
123
|
#define GGML_GELU_FP16
|
124
124
|
#define GGML_GELU_QUICK_FP16
|
125
125
|
#define GGML_SILU_FP16
|
126
|
+
// #define GGML_CROSS_ENTROPY_EXP_FP16
|
127
|
+
// #define GGML_FLASH_ATTN_EXP_FP16
|
126
128
|
|
127
129
|
#define GGML_SOFT_MAX_UNROLL 4
|
128
130
|
#define GGML_VEC_DOT_UNROLL 2
|
@@ -157,12 +159,6 @@ typedef void * thread_ret_t;
|
|
157
159
|
//#define GGML_SOFT_MAX_ACCELERATE
|
158
160
|
#endif
|
159
161
|
|
160
|
-
#if UINTPTR_MAX == 0xFFFFFFFF
|
161
|
-
#define GGML_MEM_ALIGN 4
|
162
|
-
#else
|
163
|
-
#define GGML_MEM_ALIGN 16
|
164
|
-
#endif
|
165
|
-
|
166
162
|
//
|
167
163
|
// logging
|
168
164
|
//
|
@@ -192,8 +188,8 @@ typedef void * thread_ret_t;
|
|
192
188
|
//
|
193
189
|
|
194
190
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
195
|
-
#define GGML_ALIGNED_MALLOC(size)
|
196
|
-
#define GGML_ALIGNED_FREE(ptr)
|
191
|
+
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
|
192
|
+
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
|
197
193
|
#else
|
198
194
|
inline static void * ggml_aligned_malloc(size_t size) {
|
199
195
|
void * aligned_memory = NULL;
|
@@ -218,8 +214,8 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
|
218
214
|
}
|
219
215
|
return aligned_memory;
|
220
216
|
}
|
221
|
-
#define GGML_ALIGNED_MALLOC(size)
|
222
|
-
#define GGML_ALIGNED_FREE(ptr)
|
217
|
+
#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size)
|
218
|
+
#define GGML_ALIGNED_FREE(ptr) free(ptr)
|
223
219
|
#endif
|
224
220
|
|
225
221
|
#define UNUSED GGML_UNUSED
|
@@ -305,6 +301,10 @@ typedef double ggml_float;
|
|
305
301
|
#endif
|
306
302
|
#endif
|
307
303
|
|
304
|
+
#ifdef __riscv_v_intrinsic
|
305
|
+
#include <riscv_vector.h>
|
306
|
+
#endif
|
307
|
+
|
308
308
|
#ifdef __F16C__
|
309
309
|
|
310
310
|
#ifdef _MSC_VER
|
@@ -2436,7 +2436,6 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
|
|
2436
2436
|
const int nb = n / qk;
|
2437
2437
|
|
2438
2438
|
assert(n % qk == 0);
|
2439
|
-
assert(nb % 2 == 0);
|
2440
2439
|
|
2441
2440
|
const block_q4_0 * restrict x = vx;
|
2442
2441
|
const block_q8_0 * restrict y = vy;
|
@@ -2445,6 +2444,7 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
|
|
2445
2444
|
float32x4_t sumv0 = vdupq_n_f32(0.0f);
|
2446
2445
|
float32x4_t sumv1 = vdupq_n_f32(0.0f);
|
2447
2446
|
|
2447
|
+
GGML_ASSERT(nb % 2 == 0); // TODO: handle odd nb
|
2448
2448
|
for (int i = 0; i < nb; i += 2) {
|
2449
2449
|
const block_q4_0 * restrict x0 = &x[i + 0];
|
2450
2450
|
const block_q4_0 * restrict x1 = &x[i + 1];
|
@@ -2623,6 +2623,7 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
|
|
2623
2623
|
}
|
2624
2624
|
|
2625
2625
|
// Main loop
|
2626
|
+
GGML_ASSERT(nb % 2 == 0); // TODO: handle odd nb
|
2626
2627
|
for (int i = 2; i < nb; i+=2) {
|
2627
2628
|
_mm_prefetch(&x[i] + sizeof(block_q4_0), _MM_HINT_T0);
|
2628
2629
|
_mm_prefetch(&y[i] + sizeof(block_q8_0), _MM_HINT_T0);
|
@@ -2680,6 +2681,41 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
|
|
2680
2681
|
}
|
2681
2682
|
|
2682
2683
|
*s = hsum_float_4x4(acc_0, acc_1, acc_2, acc_3);
|
2684
|
+
#elif defined(__riscv_v_intrinsic)
|
2685
|
+
float sumf = 0.0;
|
2686
|
+
|
2687
|
+
size_t vl = __riscv_vsetvl_e8m1(qk/2);
|
2688
|
+
|
2689
|
+
for (int i = 0; i < nb; i++) {
|
2690
|
+
vuint8m1_t tx = __riscv_vle8_v_u8m1(x[i].qs, vl);
|
2691
|
+
|
2692
|
+
vint8m1_t y0 = __riscv_vle8_v_i8m1(y[i].qs, vl);
|
2693
|
+
vint8m1_t y1 = __riscv_vle8_v_i8m1(y[i].qs+16, vl);
|
2694
|
+
|
2695
|
+
vuint8m1_t x_a = __riscv_vand_vx_u8m1(tx, 0x0F, vl);
|
2696
|
+
vuint8m1_t x_l = __riscv_vsrl_vx_u8m1(tx, 0x04, vl);
|
2697
|
+
|
2698
|
+
vint8m1_t x_ai = __riscv_vreinterpret_v_u8m1_i8m1(x_a);
|
2699
|
+
vint8m1_t x_li = __riscv_vreinterpret_v_u8m1_i8m1(x_l);
|
2700
|
+
|
2701
|
+
vint8m1_t v0 = __riscv_vsub_vx_i8m1(x_ai, 8, vl);
|
2702
|
+
vint8m1_t v1 = __riscv_vsub_vx_i8m1(x_li, 8, vl);
|
2703
|
+
|
2704
|
+
vint16m2_t vec_mul1 = __riscv_vwmul_vv_i16m2(v0, y0, vl);
|
2705
|
+
vint16m2_t vec_mul2 = __riscv_vwmul_vv_i16m2(v1, y1, vl);
|
2706
|
+
|
2707
|
+
vint32m1_t vec_zero = __riscv_vmv_v_x_i32m1(0, vl);
|
2708
|
+
|
2709
|
+
vint32m1_t vs1 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul1, vec_zero, vl);
|
2710
|
+
vint32m1_t vs2 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul2, vec_zero, vl);
|
2711
|
+
|
2712
|
+
int sumi = __riscv_vmv_x_s_i32m1_i32(vs1);
|
2713
|
+
sumi += __riscv_vmv_x_s_i32m1_i32(vs2);
|
2714
|
+
|
2715
|
+
sumf += sumi*GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d);
|
2716
|
+
}
|
2717
|
+
|
2718
|
+
*s = sumf;
|
2683
2719
|
#else
|
2684
2720
|
// scalar
|
2685
2721
|
float sumf = 0.0;
|
@@ -2706,7 +2742,6 @@ static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void *
|
|
2706
2742
|
const int nb = n / qk;
|
2707
2743
|
|
2708
2744
|
assert(n % qk == 0);
|
2709
|
-
assert(nb % 2 == 0);
|
2710
2745
|
|
2711
2746
|
const block_q4_1 * restrict x = vx;
|
2712
2747
|
const block_q8_1 * restrict y = vy;
|
@@ -2718,6 +2753,7 @@ static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void *
|
|
2718
2753
|
|
2719
2754
|
float summs = 0;
|
2720
2755
|
|
2756
|
+
GGML_ASSERT(nb % 2 == 0); // TODO: handle odd nb
|
2721
2757
|
for (int i = 0; i < nb; i += 2) {
|
2722
2758
|
const block_q4_1 * restrict x0 = &x[i + 0];
|
2723
2759
|
const block_q4_1 * restrict x1 = &x[i + 1];
|
@@ -2806,6 +2842,38 @@ static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void *
|
|
2806
2842
|
}
|
2807
2843
|
|
2808
2844
|
*s = hsum_float_8(acc) + summs;
|
2845
|
+
#elif defined(__riscv_v_intrinsic)
|
2846
|
+
float sumf = 0.0;
|
2847
|
+
|
2848
|
+
size_t vl = __riscv_vsetvl_e8m1(qk/2);
|
2849
|
+
|
2850
|
+
for (int i = 0; i < nb; i++) {
|
2851
|
+
vuint8m1_t tx = __riscv_vle8_v_u8m1(x[i].qs, vl);
|
2852
|
+
|
2853
|
+
vint8m1_t y0 = __riscv_vle8_v_i8m1(y[i].qs, vl);
|
2854
|
+
vint8m1_t y1 = __riscv_vle8_v_i8m1(y[i].qs+16, vl);
|
2855
|
+
|
2856
|
+
vuint8m1_t x_a = __riscv_vand_vx_u8m1(tx, 0x0F, vl);
|
2857
|
+
vuint8m1_t x_l = __riscv_vsrl_vx_u8m1(tx, 0x04, vl);
|
2858
|
+
|
2859
|
+
vint8m1_t v0 = __riscv_vreinterpret_v_u8m1_i8m1(x_a);
|
2860
|
+
vint8m1_t v1 = __riscv_vreinterpret_v_u8m1_i8m1(x_l);
|
2861
|
+
|
2862
|
+
vint16m2_t vec_mul1 = __riscv_vwmul_vv_i16m2(v0, y0, vl);
|
2863
|
+
vint16m2_t vec_mul2 = __riscv_vwmul_vv_i16m2(v1, y1, vl);
|
2864
|
+
|
2865
|
+
vint32m1_t vec_zero = __riscv_vmv_v_x_i32m1(0, vl);
|
2866
|
+
|
2867
|
+
vint32m1_t vs1 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul1, vec_zero, vl);
|
2868
|
+
vint32m1_t vs2 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul2, vec_zero, vl);
|
2869
|
+
|
2870
|
+
int sumi = __riscv_vmv_x_s_i32m1_i32(vs1);
|
2871
|
+
sumi += __riscv_vmv_x_s_i32m1_i32(vs2);
|
2872
|
+
|
2873
|
+
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
|
2874
|
+
}
|
2875
|
+
|
2876
|
+
*s = sumf;
|
2809
2877
|
#else
|
2810
2878
|
// scalar
|
2811
2879
|
float sumf = 0.0;
|
@@ -2832,7 +2900,6 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
|
|
2832
2900
|
const int nb = n / qk;
|
2833
2901
|
|
2834
2902
|
assert(n % qk == 0);
|
2835
|
-
assert(nb % 2 == 0);
|
2836
2903
|
assert(qk == QK5_0);
|
2837
2904
|
|
2838
2905
|
const block_q5_0 * restrict x = vx;
|
@@ -2848,6 +2915,7 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
|
|
2848
2915
|
uint64_t tmp0[4];
|
2849
2916
|
uint64_t tmp1[4];
|
2850
2917
|
|
2918
|
+
GGML_ASSERT(nb % 2 == 0); // TODO: handle odd nb
|
2851
2919
|
for (int i = 0; i < nb; i += 2) {
|
2852
2920
|
const block_q5_0 * restrict x0 = &x[i];
|
2853
2921
|
const block_q5_0 * restrict x1 = &x[i + 1];
|
@@ -3040,6 +3108,76 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
|
|
3040
3108
|
}
|
3041
3109
|
|
3042
3110
|
*s = hsum_float_8(acc);
|
3111
|
+
#elif defined(__riscv_v_intrinsic)
|
3112
|
+
float sumf = 0.0;
|
3113
|
+
|
3114
|
+
uint32_t qh;
|
3115
|
+
|
3116
|
+
// These temp values are for masking and shift operations
|
3117
|
+
uint32_t temp_1[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
3118
|
+
uint32_t temp_2[16] = {0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
|
3119
|
+
0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000};
|
3120
|
+
|
3121
|
+
size_t vl = __riscv_vsetvl_e8m1(qk/2);
|
3122
|
+
|
3123
|
+
for (int i = 0; i < nb; i++) {
|
3124
|
+
memcpy(&qh, x[i].qh, sizeof(uint32_t));
|
3125
|
+
|
3126
|
+
// temporary registers
|
3127
|
+
vuint32m4_t vt_1 = __riscv_vle32_v_u32m4(temp_2, vl);
|
3128
|
+
vuint32m4_t vt_2 = __riscv_vle32_v_u32m4(temp_1, vl);
|
3129
|
+
vuint32m4_t vt_3 = __riscv_vsll_vx_u32m4(vt_1, 16, vl);
|
3130
|
+
vuint32m4_t vt_4 = __riscv_vadd_vx_u32m4(vt_2, 12, vl);
|
3131
|
+
|
3132
|
+
// ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
|
3133
|
+
vuint32m4_t xha_0 = __riscv_vand_vx_u32m4(vt_1, qh, vl);
|
3134
|
+
vuint32m4_t xhr_0 = __riscv_vsrl_vv_u32m4(xha_0, vt_2, vl);
|
3135
|
+
vuint32m4_t xhl_0 = __riscv_vsll_vx_u32m4(xhr_0, 4, vl);
|
3136
|
+
|
3137
|
+
// ((qh & (1u << (j + 16))) >> (j + 12));
|
3138
|
+
vuint32m4_t xha_1 = __riscv_vand_vx_u32m4(vt_3, qh, vl);
|
3139
|
+
vuint32m4_t xhl_1 = __riscv_vsrl_vv_u32m4(xha_1, vt_4, vl);
|
3140
|
+
|
3141
|
+
// narrowing
|
3142
|
+
vuint16m2_t xhc_0 = __riscv_vncvt_x_x_w_u16m2(xhl_0, vl);
|
3143
|
+
vuint8m1_t xh_0 = __riscv_vncvt_x_x_w_u8m1(xhc_0, vl);
|
3144
|
+
|
3145
|
+
vuint16m2_t xhc_1 = __riscv_vncvt_x_x_w_u16m2(xhl_1, vl);
|
3146
|
+
vuint8m1_t xh_1 = __riscv_vncvt_x_x_w_u8m1(xhc_1, vl);
|
3147
|
+
|
3148
|
+
// load
|
3149
|
+
vuint8m1_t tx = __riscv_vle8_v_u8m1(x[i].qs, vl);
|
3150
|
+
|
3151
|
+
vint8m1_t y0 = __riscv_vle8_v_i8m1(y[i].qs, vl);
|
3152
|
+
vint8m1_t y1 = __riscv_vle8_v_i8m1(y[i].qs+16, vl);
|
3153
|
+
|
3154
|
+
vuint8m1_t x_at = __riscv_vand_vx_u8m1(tx, 0x0F, vl);
|
3155
|
+
vuint8m1_t x_lt = __riscv_vsrl_vx_u8m1(tx, 0x04, vl);
|
3156
|
+
|
3157
|
+
vuint8m1_t x_a = __riscv_vor_vv_u8m1(x_at, xh_0, vl);
|
3158
|
+
vuint8m1_t x_l = __riscv_vor_vv_u8m1(x_lt, xh_1, vl);
|
3159
|
+
|
3160
|
+
vint8m1_t x_ai = __riscv_vreinterpret_v_u8m1_i8m1(x_a);
|
3161
|
+
vint8m1_t x_li = __riscv_vreinterpret_v_u8m1_i8m1(x_l);
|
3162
|
+
|
3163
|
+
vint8m1_t v0 = __riscv_vsub_vx_i8m1(x_ai, 16, vl);
|
3164
|
+
vint8m1_t v1 = __riscv_vsub_vx_i8m1(x_li, 16, vl);
|
3165
|
+
|
3166
|
+
vint16m2_t vec_mul1 = __riscv_vwmul_vv_i16m2(v0, y0, vl);
|
3167
|
+
vint16m2_t vec_mul2 = __riscv_vwmul_vv_i16m2(v1, y1, vl);
|
3168
|
+
|
3169
|
+
vint32m1_t vec_zero = __riscv_vmv_v_x_i32m1(0, vl);
|
3170
|
+
|
3171
|
+
vint32m1_t vs1 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul1, vec_zero, vl);
|
3172
|
+
vint32m1_t vs2 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul2, vec_zero, vl);
|
3173
|
+
|
3174
|
+
int sumi = __riscv_vmv_x_s_i32m1_i32(vs1);
|
3175
|
+
sumi += __riscv_vmv_x_s_i32m1_i32(vs2);
|
3176
|
+
|
3177
|
+
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d)) * sumi;
|
3178
|
+
}
|
3179
|
+
|
3180
|
+
*s = sumf;
|
3043
3181
|
#else
|
3044
3182
|
// scalar
|
3045
3183
|
float sumf = 0.0;
|
@@ -3072,7 +3210,6 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
|
|
3072
3210
|
const int nb = n / qk;
|
3073
3211
|
|
3074
3212
|
assert(n % qk == 0);
|
3075
|
-
assert(nb % 2 == 0);
|
3076
3213
|
assert(qk == QK5_1);
|
3077
3214
|
|
3078
3215
|
const block_q5_1 * restrict x = vx;
|
@@ -3091,6 +3228,7 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
|
|
3091
3228
|
uint64_t tmp0[4];
|
3092
3229
|
uint64_t tmp1[4];
|
3093
3230
|
|
3231
|
+
GGML_ASSERT(nb % 2 == 0); // TODO: handle odd nb
|
3094
3232
|
for (int i = 0; i < nb; i += 2) {
|
3095
3233
|
const block_q5_1 * restrict x0 = &x[i];
|
3096
3234
|
const block_q5_1 * restrict x1 = &x[i + 1];
|
@@ -3296,6 +3434,72 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
|
|
3296
3434
|
}
|
3297
3435
|
|
3298
3436
|
*s = hsum_float_8(acc) + summs;
|
3437
|
+
#elif defined(__riscv_v_intrinsic)
|
3438
|
+
float sumf = 0.0;
|
3439
|
+
|
3440
|
+
uint32_t qh;
|
3441
|
+
|
3442
|
+
// These temp values are for shift operations
|
3443
|
+
uint32_t temp_1[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
3444
|
+
|
3445
|
+
size_t vl = __riscv_vsetvl_e8m1(qk/2);
|
3446
|
+
|
3447
|
+
for (int i = 0; i < nb; i++) {
|
3448
|
+
memcpy(&qh, x[i].qh, sizeof(uint32_t));
|
3449
|
+
|
3450
|
+
// temporary registers
|
3451
|
+
vuint32m4_t vt_1 = __riscv_vle32_v_u32m4(temp_1, vl);
|
3452
|
+
vuint32m4_t vt_2 = __riscv_vadd_vx_u32m4(vt_1, 12, vl);
|
3453
|
+
|
3454
|
+
// load qh
|
3455
|
+
vuint32m4_t vqh = __riscv_vmv_v_x_u32m4(qh, vl);
|
3456
|
+
|
3457
|
+
// ((qh >> (j + 0)) << 4) & 0x10;
|
3458
|
+
vuint32m4_t xhr_0 = __riscv_vsrl_vv_u32m4(vqh, vt_1, vl);
|
3459
|
+
vuint32m4_t xhl_0 = __riscv_vsll_vx_u32m4(xhr_0, 4, vl);
|
3460
|
+
vuint32m4_t xha_0 = __riscv_vand_vx_u32m4(xhl_0, 0x10, vl);
|
3461
|
+
|
3462
|
+
// ((qh >> (j + 12)) ) & 0x10;
|
3463
|
+
vuint32m4_t xhr_1 = __riscv_vsrl_vv_u32m4(vqh, vt_2, vl);
|
3464
|
+
vuint32m4_t xha_1 = __riscv_vand_vx_u32m4(xhr_1, 0x10, vl);
|
3465
|
+
|
3466
|
+
// narrowing
|
3467
|
+
vuint16m2_t xhc_0 = __riscv_vncvt_x_x_w_u16m2(xha_0, vl);
|
3468
|
+
vuint8m1_t xh_0 = __riscv_vncvt_x_x_w_u8m1(xhc_0, vl);
|
3469
|
+
|
3470
|
+
vuint16m2_t xhc_1 = __riscv_vncvt_x_x_w_u16m2(xha_1, vl);
|
3471
|
+
vuint8m1_t xh_1 = __riscv_vncvt_x_x_w_u8m1(xhc_1, vl);
|
3472
|
+
|
3473
|
+
// load
|
3474
|
+
vuint8m1_t tx = __riscv_vle8_v_u8m1(x[i].qs, vl);
|
3475
|
+
|
3476
|
+
vint8m1_t y0 = __riscv_vle8_v_i8m1(y[i].qs, vl);
|
3477
|
+
vint8m1_t y1 = __riscv_vle8_v_i8m1(y[i].qs+16, vl);
|
3478
|
+
|
3479
|
+
vuint8m1_t x_at = __riscv_vand_vx_u8m1(tx, 0x0F, vl);
|
3480
|
+
vuint8m1_t x_lt = __riscv_vsrl_vx_u8m1(tx, 0x04, vl);
|
3481
|
+
|
3482
|
+
vuint8m1_t x_a = __riscv_vor_vv_u8m1(x_at, xh_0, vl);
|
3483
|
+
vuint8m1_t x_l = __riscv_vor_vv_u8m1(x_lt, xh_1, vl);
|
3484
|
+
|
3485
|
+
vint8m1_t v0 = __riscv_vreinterpret_v_u8m1_i8m1(x_a);
|
3486
|
+
vint8m1_t v1 = __riscv_vreinterpret_v_u8m1_i8m1(x_l);
|
3487
|
+
|
3488
|
+
vint16m2_t vec_mul1 = __riscv_vwmul_vv_i16m2(v0, y0, vl);
|
3489
|
+
vint16m2_t vec_mul2 = __riscv_vwmul_vv_i16m2(v1, y1, vl);
|
3490
|
+
|
3491
|
+
vint32m1_t vec_zero = __riscv_vmv_v_x_i32m1(0, vl);
|
3492
|
+
|
3493
|
+
vint32m1_t vs1 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul1, vec_zero, vl);
|
3494
|
+
vint32m1_t vs2 = __riscv_vwredsum_vs_i16m2_i32m1(vec_mul2, vec_zero, vl);
|
3495
|
+
|
3496
|
+
int sumi = __riscv_vmv_x_s_i32m1_i32(vs1);
|
3497
|
+
sumi += __riscv_vmv_x_s_i32m1_i32(vs2);
|
3498
|
+
|
3499
|
+
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
|
3500
|
+
}
|
3501
|
+
|
3502
|
+
*s = sumf;
|
3299
3503
|
#else
|
3300
3504
|
// scalar
|
3301
3505
|
float sumf = 0.0;
|
@@ -3328,7 +3532,6 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *
|
|
3328
3532
|
const int nb = n / qk;
|
3329
3533
|
|
3330
3534
|
assert(n % qk == 0);
|
3331
|
-
assert(nb % 2 == 0);
|
3332
3535
|
|
3333
3536
|
const block_q8_0 * restrict x = vx;
|
3334
3537
|
const block_q8_0 * restrict y = vy;
|
@@ -3337,6 +3540,7 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *
|
|
3337
3540
|
float32x4_t sumv0 = vdupq_n_f32(0.0f);
|
3338
3541
|
float32x4_t sumv1 = vdupq_n_f32(0.0f);
|
3339
3542
|
|
3543
|
+
GGML_ASSERT(nb % 2 == 0); // TODO: handle odd nb
|
3340
3544
|
for (int i = 0; i < nb; i += 2) {
|
3341
3545
|
const block_q8_0 * restrict x0 = &x[i + 0];
|
3342
3546
|
const block_q8_0 * restrict x1 = &x[i + 1];
|
@@ -3407,6 +3611,26 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *
|
|
3407
3611
|
}
|
3408
3612
|
|
3409
3613
|
*s = hsum_float_8(acc);
|
3614
|
+
#elif defined(__riscv_v_intrinsic)
|
3615
|
+
float sumf = 0.0;
|
3616
|
+
size_t vl = __riscv_vsetvl_e8m1(qk);
|
3617
|
+
|
3618
|
+
for (int i = 0; i < nb; i++) {
|
3619
|
+
// load elements
|
3620
|
+
vint8m1_t bx = __riscv_vle8_v_i8m1(x[i].qs, vl);
|
3621
|
+
vint8m1_t by = __riscv_vle8_v_i8m1(y[i].qs, vl);
|
3622
|
+
|
3623
|
+
vint16m2_t vw_mul = __riscv_vwmul_vv_i16m2(bx, by, vl);
|
3624
|
+
|
3625
|
+
vint32m1_t v_zero = __riscv_vmv_v_x_i32m1(0, vl);
|
3626
|
+
vint32m1_t v_sum = __riscv_vwredsum_vs_i16m2_i32m1(vw_mul, v_zero, vl);
|
3627
|
+
|
3628
|
+
int sumi = __riscv_vmv_x_s_i32m1_i32(v_sum);
|
3629
|
+
|
3630
|
+
sumf += sumi*(GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d));
|
3631
|
+
}
|
3632
|
+
|
3633
|
+
*s = sumf;
|
3410
3634
|
#else
|
3411
3635
|
// scalar
|
3412
3636
|
float sumf = 0.0;
|
@@ -4107,16 +4331,11 @@ int64_t ggml_nrows(const struct ggml_tensor * tensor) {
|
|
4107
4331
|
}
|
4108
4332
|
|
4109
4333
|
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
|
4110
|
-
|
4111
|
-
|
4112
|
-
|
4113
|
-
|
4114
|
-
|
4115
|
-
// return tensor->ne[3]*tensor->nb[3]
|
4116
|
-
//
|
4117
|
-
// is enough, but just in case, adding the second part
|
4118
|
-
|
4119
|
-
return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*ggml_type_size(tensor->type))/ggml_blck_size(tensor->type));
|
4334
|
+
size_t nbytes = tensor->ne[0]*tensor->nb[0]/ggml_blck_size(tensor->type);
|
4335
|
+
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
|
4336
|
+
nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
|
4337
|
+
}
|
4338
|
+
return nbytes;
|
4120
4339
|
}
|
4121
4340
|
|
4122
4341
|
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
|
@@ -4570,36 +4789,51 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
4570
4789
|
enum ggml_type type,
|
4571
4790
|
int n_dims,
|
4572
4791
|
const int64_t * ne,
|
4573
|
-
|
4792
|
+
struct ggml_tensor * view_src,
|
4793
|
+
size_t view_offs) {
|
4574
4794
|
|
4575
4795
|
assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
|
4576
4796
|
|
4577
|
-
|
4797
|
+
// find the base tensor and absolute offset
|
4798
|
+
if (view_src != NULL && view_src->view_src != NULL) {
|
4799
|
+
view_offs += view_src->view_offs;
|
4800
|
+
view_src = view_src->view_src;
|
4801
|
+
}
|
4578
4802
|
|
4579
|
-
|
4580
|
-
|
4581
|
-
|
4582
|
-
data_size *= ne[i];
|
4583
|
-
}
|
4803
|
+
size_t data_size = ggml_type_size(type)*(ne[0]/ggml_blck_size(type));
|
4804
|
+
for (int i = 1; i < n_dims; i++) {
|
4805
|
+
data_size *= ne[i];
|
4584
4806
|
}
|
4585
4807
|
|
4586
|
-
|
4587
|
-
// allocate tensor data in the scratch buffer
|
4588
|
-
if (ctx->scratch.offs + data_size > ctx->scratch.size) {
|
4589
|
-
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
4590
|
-
__func__, ctx->scratch.offs + data_size, ctx->scratch.size);
|
4591
|
-
assert(false);
|
4592
|
-
return NULL;
|
4593
|
-
}
|
4808
|
+
GGML_ASSERT(view_src == NULL || data_size + view_offs <= ggml_nbytes(view_src));
|
4594
4809
|
|
4595
|
-
|
4810
|
+
void * data = view_src != NULL ? view_src->data : NULL;
|
4811
|
+
if (data != NULL) {
|
4812
|
+
data = (char *) data + view_offs;
|
4813
|
+
}
|
4596
4814
|
|
4597
|
-
|
4815
|
+
size_t obj_alloc_size = 0;
|
4598
4816
|
|
4599
|
-
|
4817
|
+
if (view_src == NULL && ctx->no_alloc == false) {
|
4818
|
+
if (ctx->scratch.data != NULL) {
|
4819
|
+
// allocate tensor data in the scratch buffer
|
4820
|
+
if (ctx->scratch.offs + data_size > ctx->scratch.size) {
|
4821
|
+
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
4822
|
+
__func__, ctx->scratch.offs + data_size, ctx->scratch.size);
|
4823
|
+
assert(false);
|
4824
|
+
return NULL;
|
4825
|
+
}
|
4826
|
+
|
4827
|
+
data = (char * const) ctx->scratch.data + ctx->scratch.offs;
|
4828
|
+
|
4829
|
+
ctx->scratch.offs += data_size;
|
4830
|
+
} else {
|
4831
|
+
// allocate tensor data in the context's memory pool
|
4832
|
+
obj_alloc_size = data_size;
|
4833
|
+
}
|
4600
4834
|
}
|
4601
4835
|
|
4602
|
-
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE +
|
4836
|
+
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
|
4603
4837
|
|
4604
4838
|
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
4605
4839
|
|
@@ -4619,7 +4853,9 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
4619
4853
|
/*.perf_runs =*/ 0,
|
4620
4854
|
/*.perf_cycles =*/ 0,
|
4621
4855
|
/*.perf_time_us =*/ 0,
|
4622
|
-
/*.
|
4856
|
+
/*.view_src =*/ view_src,
|
4857
|
+
/*.view_offs =*/ view_offs,
|
4858
|
+
/*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
|
4623
4859
|
/*.name =*/ { 0 },
|
4624
4860
|
/*.extra =*/ NULL,
|
4625
4861
|
/*.padding =*/ { 0 },
|
@@ -4643,28 +4879,12 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
4643
4879
|
return result;
|
4644
4880
|
}
|
4645
4881
|
|
4646
|
-
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
4647
|
-
GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
|
4648
|
-
assert(params_size <= GGML_MAX_OP_PARAMS);
|
4649
|
-
memcpy(tensor->op_params, params, params_size);
|
4650
|
-
}
|
4651
|
-
|
4652
|
-
static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
|
4653
|
-
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4654
|
-
return ((const int32_t *)(tensor->op_params))[i];
|
4655
|
-
}
|
4656
|
-
|
4657
|
-
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
|
4658
|
-
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4659
|
-
((int32_t *)(tensor->op_params))[i] = value;
|
4660
|
-
}
|
4661
|
-
|
4662
4882
|
struct ggml_tensor * ggml_new_tensor(
|
4663
4883
|
struct ggml_context * ctx,
|
4664
4884
|
enum ggml_type type,
|
4665
4885
|
int n_dims,
|
4666
4886
|
const int64_t * ne) {
|
4667
|
-
return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL);
|
4887
|
+
return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
|
4668
4888
|
}
|
4669
4889
|
|
4670
4890
|
struct ggml_tensor * ggml_new_tensor_1d(
|
@@ -4729,7 +4949,23 @@ struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) {
|
|
4729
4949
|
}
|
4730
4950
|
|
4731
4951
|
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
|
4732
|
-
return
|
4952
|
+
return ggml_new_tensor(ctx, src->type, src->n_dims, src->ne);
|
4953
|
+
}
|
4954
|
+
|
4955
|
+
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
4956
|
+
GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
|
4957
|
+
assert(params_size <= GGML_MAX_OP_PARAMS);
|
4958
|
+
memcpy(tensor->op_params, params, params_size);
|
4959
|
+
}
|
4960
|
+
|
4961
|
+
static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
|
4962
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4963
|
+
return ((const int32_t *)(tensor->op_params))[i];
|
4964
|
+
}
|
4965
|
+
|
4966
|
+
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
|
4967
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4968
|
+
((int32_t *)(tensor->op_params))[i] = value;
|
4733
4969
|
}
|
4734
4970
|
|
4735
4971
|
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
|
@@ -5015,14 +5251,13 @@ struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char *
|
|
5015
5251
|
|
5016
5252
|
struct ggml_tensor * ggml_view_tensor(
|
5017
5253
|
struct ggml_context * ctx,
|
5018
|
-
|
5019
|
-
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src
|
5254
|
+
struct ggml_tensor * src) {
|
5255
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src, 0);
|
5020
5256
|
ggml_format_name(result, "%s (view)", src->name);
|
5021
5257
|
|
5022
|
-
|
5023
|
-
|
5024
|
-
|
5025
|
-
result->nb[3] = src->nb[3];
|
5258
|
+
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
5259
|
+
result->nb[i] = src->nb[i];
|
5260
|
+
}
|
5026
5261
|
|
5027
5262
|
return result;
|
5028
5263
|
}
|
@@ -5595,7 +5830,7 @@ struct ggml_tensor * ggml_repeat_back(
|
|
5595
5830
|
|
5596
5831
|
// ggml_concat
|
5597
5832
|
|
5598
|
-
struct ggml_tensor* ggml_concat(
|
5833
|
+
struct ggml_tensor * ggml_concat(
|
5599
5834
|
struct ggml_context* ctx,
|
5600
5835
|
struct ggml_tensor* a,
|
5601
5836
|
struct ggml_tensor* b) {
|
@@ -5862,7 +6097,8 @@ struct ggml_tensor * ggml_rms_norm_inplace(
|
|
5862
6097
|
struct ggml_tensor * ggml_rms_norm_back(
|
5863
6098
|
struct ggml_context * ctx,
|
5864
6099
|
struct ggml_tensor * a,
|
5865
|
-
struct ggml_tensor * b
|
6100
|
+
struct ggml_tensor * b,
|
6101
|
+
float eps) {
|
5866
6102
|
bool is_node = false;
|
5867
6103
|
|
5868
6104
|
if (a->grad) {
|
@@ -5872,6 +6108,8 @@ struct ggml_tensor * ggml_rms_norm_back(
|
|
5872
6108
|
|
5873
6109
|
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
5874
6110
|
|
6111
|
+
ggml_set_op_params(result, &eps, sizeof(eps));
|
6112
|
+
|
5875
6113
|
result->op = GGML_OP_RMS_NORM_BACK;
|
5876
6114
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5877
6115
|
result->src[0] = a;
|
@@ -6201,7 +6439,7 @@ struct ggml_tensor * ggml_reshape(
|
|
6201
6439
|
//GGML_ASSERT(false);
|
6202
6440
|
}
|
6203
6441
|
|
6204
|
-
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a
|
6442
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a, 0);
|
6205
6443
|
ggml_format_name(result, "%s (reshaped)", a->name);
|
6206
6444
|
|
6207
6445
|
result->op = GGML_OP_RESHAPE;
|
@@ -6225,7 +6463,7 @@ struct ggml_tensor * ggml_reshape_1d(
|
|
6225
6463
|
}
|
6226
6464
|
|
6227
6465
|
const int64_t ne[1] = { ne0 };
|
6228
|
-
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a
|
6466
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
|
6229
6467
|
ggml_format_name(result, "%s (reshaped)", a->name);
|
6230
6468
|
|
6231
6469
|
result->op = GGML_OP_RESHAPE;
|
@@ -6250,7 +6488,7 @@ struct ggml_tensor * ggml_reshape_2d(
|
|
6250
6488
|
}
|
6251
6489
|
|
6252
6490
|
const int64_t ne[2] = { ne0, ne1 };
|
6253
|
-
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a
|
6491
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
|
6254
6492
|
ggml_format_name(result, "%s (reshaped)", a->name);
|
6255
6493
|
|
6256
6494
|
result->op = GGML_OP_RESHAPE;
|
@@ -6276,7 +6514,7 @@ struct ggml_tensor * ggml_reshape_3d(
|
|
6276
6514
|
}
|
6277
6515
|
|
6278
6516
|
const int64_t ne[3] = { ne0, ne1, ne2 };
|
6279
|
-
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a
|
6517
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
|
6280
6518
|
ggml_format_name(result, "%s (reshaped)", a->name);
|
6281
6519
|
|
6282
6520
|
result->op = GGML_OP_RESHAPE;
|
@@ -6286,7 +6524,6 @@ struct ggml_tensor * ggml_reshape_3d(
|
|
6286
6524
|
return result;
|
6287
6525
|
}
|
6288
6526
|
|
6289
|
-
|
6290
6527
|
struct ggml_tensor * ggml_reshape_4d(
|
6291
6528
|
struct ggml_context * ctx,
|
6292
6529
|
struct ggml_tensor * a,
|
@@ -6304,7 +6541,7 @@ struct ggml_tensor * ggml_reshape_4d(
|
|
6304
6541
|
}
|
6305
6542
|
|
6306
6543
|
const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
|
6307
|
-
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a
|
6544
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
|
6308
6545
|
ggml_format_name(result, "%s (reshaped)", a->name);
|
6309
6546
|
|
6310
6547
|
result->op = GGML_OP_RESHAPE;
|
@@ -6314,46 +6551,40 @@ struct ggml_tensor * ggml_reshape_4d(
|
|
6314
6551
|
return result;
|
6315
6552
|
}
|
6316
6553
|
|
6317
|
-
|
6318
|
-
|
6319
|
-
static struct ggml_tensor * ggml_view_tensor_offset(
|
6554
|
+
static struct ggml_tensor * ggml_view_impl(
|
6320
6555
|
struct ggml_context * ctx,
|
6321
6556
|
struct ggml_tensor * a,
|
6322
6557
|
int n_dims,
|
6323
6558
|
const int64_t * ne,
|
6324
6559
|
size_t offset) {
|
6325
|
-
// don't calculate an offset from an unallocated tensor
|
6326
|
-
void * data = NULL;
|
6327
|
-
if (a->data != NULL) {
|
6328
|
-
data = (char *) a->data + offset;
|
6329
|
-
}
|
6330
6560
|
|
6331
|
-
|
6561
|
+
bool is_node = false;
|
6562
|
+
|
6563
|
+
if (a->grad) {
|
6564
|
+
is_node = true;
|
6565
|
+
}
|
6332
6566
|
|
6567
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
|
6333
6568
|
ggml_format_name(result, "%s (view)", a->name);
|
6334
6569
|
|
6335
6570
|
ggml_set_op_params(result, &offset, sizeof(offset));
|
6336
6571
|
|
6572
|
+
result->op = GGML_OP_VIEW;
|
6573
|
+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6574
|
+
result->src[0] = a;
|
6575
|
+
|
6337
6576
|
return result;
|
6338
6577
|
}
|
6339
6578
|
|
6579
|
+
// ggml_view_1d
|
6580
|
+
|
6340
6581
|
struct ggml_tensor * ggml_view_1d(
|
6341
6582
|
struct ggml_context * ctx,
|
6342
6583
|
struct ggml_tensor * a,
|
6343
6584
|
int64_t ne0,
|
6344
6585
|
size_t offset) {
|
6345
6586
|
|
6346
|
-
|
6347
|
-
|
6348
|
-
if (a->grad) {
|
6349
|
-
is_node = true;
|
6350
|
-
}
|
6351
|
-
|
6352
|
-
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 1, &ne0, offset);
|
6353
|
-
|
6354
|
-
result->op = GGML_OP_VIEW;
|
6355
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6356
|
-
result->src[0] = a;
|
6587
|
+
struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
|
6357
6588
|
|
6358
6589
|
return result;
|
6359
6590
|
}
|
@@ -6368,24 +6599,14 @@ struct ggml_tensor * ggml_view_2d(
|
|
6368
6599
|
size_t nb1,
|
6369
6600
|
size_t offset) {
|
6370
6601
|
|
6371
|
-
|
6372
|
-
|
6373
|
-
if (a->grad) {
|
6374
|
-
is_node = true;
|
6375
|
-
}
|
6376
|
-
|
6377
|
-
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 };
|
6602
|
+
const int64_t ne[2] = { ne0, ne1 };
|
6378
6603
|
|
6379
|
-
struct ggml_tensor * result =
|
6604
|
+
struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
|
6380
6605
|
|
6381
6606
|
result->nb[1] = nb1;
|
6382
6607
|
result->nb[2] = result->nb[1]*ne1;
|
6383
6608
|
result->nb[3] = result->nb[2];
|
6384
6609
|
|
6385
|
-
result->op = GGML_OP_VIEW;
|
6386
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6387
|
-
result->src[0] = a;
|
6388
|
-
|
6389
6610
|
return result;
|
6390
6611
|
}
|
6391
6612
|
|
@@ -6401,24 +6622,14 @@ struct ggml_tensor * ggml_view_3d(
|
|
6401
6622
|
size_t nb2,
|
6402
6623
|
size_t offset) {
|
6403
6624
|
|
6404
|
-
|
6405
|
-
|
6406
|
-
if (a->grad) {
|
6407
|
-
is_node = true;
|
6408
|
-
}
|
6409
|
-
|
6410
|
-
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 };
|
6625
|
+
const int64_t ne[3] = { ne0, ne1, ne2 };
|
6411
6626
|
|
6412
|
-
struct ggml_tensor * result =
|
6627
|
+
struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
|
6413
6628
|
|
6414
6629
|
result->nb[1] = nb1;
|
6415
6630
|
result->nb[2] = nb2;
|
6416
6631
|
result->nb[3] = result->nb[2]*ne2;
|
6417
6632
|
|
6418
|
-
result->op = GGML_OP_VIEW;
|
6419
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6420
|
-
result->src[0] = a;
|
6421
|
-
|
6422
6633
|
return result;
|
6423
6634
|
}
|
6424
6635
|
|
@@ -6436,24 +6647,14 @@ struct ggml_tensor * ggml_view_4d(
|
|
6436
6647
|
size_t nb3,
|
6437
6648
|
size_t offset) {
|
6438
6649
|
|
6439
|
-
|
6440
|
-
|
6441
|
-
if (a->grad) {
|
6442
|
-
is_node = true;
|
6443
|
-
}
|
6444
|
-
|
6445
|
-
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, ne3 };
|
6650
|
+
const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
|
6446
6651
|
|
6447
|
-
struct ggml_tensor * result =
|
6652
|
+
struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
|
6448
6653
|
|
6449
6654
|
result->nb[1] = nb1;
|
6450
6655
|
result->nb[2] = nb2;
|
6451
6656
|
result->nb[3] = nb3;
|
6452
6657
|
|
6453
|
-
result->op = GGML_OP_VIEW;
|
6454
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6455
|
-
result->src[0] = a;
|
6456
|
-
|
6457
6658
|
return result;
|
6458
6659
|
}
|
6459
6660
|
|
@@ -6640,7 +6841,7 @@ static struct ggml_tensor * ggml_diag_mask_inf_impl(
|
|
6640
6841
|
|
6641
6842
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6642
6843
|
|
6643
|
-
int32_t params[] = { n_past
|
6844
|
+
int32_t params[] = { n_past };
|
6644
6845
|
ggml_set_op_params(result, params, sizeof(params));
|
6645
6846
|
|
6646
6847
|
result->op = GGML_OP_DIAG_MASK_INF;
|
@@ -6657,7 +6858,6 @@ struct ggml_tensor * ggml_diag_mask_inf(
|
|
6657
6858
|
return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
|
6658
6859
|
}
|
6659
6860
|
|
6660
|
-
|
6661
6861
|
struct ggml_tensor * ggml_diag_mask_inf_inplace(
|
6662
6862
|
struct ggml_context * ctx,
|
6663
6863
|
struct ggml_tensor * a,
|
@@ -6680,7 +6880,7 @@ static struct ggml_tensor * ggml_diag_mask_zero_impl(
|
|
6680
6880
|
|
6681
6881
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6682
6882
|
|
6683
|
-
int32_t params[] = { n_past
|
6883
|
+
int32_t params[] = { n_past };
|
6684
6884
|
ggml_set_op_params(result, params, sizeof(params));
|
6685
6885
|
|
6686
6886
|
result->op = GGML_OP_DIAG_MASK_ZERO;
|
@@ -7097,11 +7297,13 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
|
7097
7297
|
};
|
7098
7298
|
|
7099
7299
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7300
|
+
|
7301
|
+
ggml_set_op_params_i32(result, 0, stride);
|
7302
|
+
|
7100
7303
|
result->op = GGML_OP_CONV_TRANSPOSE_2D;
|
7101
7304
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7102
7305
|
result->src[0] = a;
|
7103
7306
|
result->src[1] = b;
|
7104
|
-
result->src[2] = ggml_new_i32(ctx, stride);
|
7105
7307
|
|
7106
7308
|
return result;
|
7107
7309
|
}
|
@@ -9446,6 +9648,8 @@ static void ggml_compute_forward_div_f32(
|
|
9446
9648
|
|
9447
9649
|
|
9448
9650
|
#ifdef GGML_USE_ACCELERATE
|
9651
|
+
UNUSED(ggml_vec_div_f32);
|
9652
|
+
|
9449
9653
|
vDSP_vdiv(
|
9450
9654
|
(float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1,
|
9451
9655
|
(float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1,
|
@@ -10752,7 +10956,8 @@ static void ggml_compute_forward_rms_norm_back_f32(
|
|
10752
10956
|
|
10753
10957
|
GGML_TENSOR_BINARY_OP_LOCALS;
|
10754
10958
|
|
10755
|
-
|
10959
|
+
float eps;
|
10960
|
+
memcpy(&eps, dst->op_params, sizeof(float));
|
10756
10961
|
|
10757
10962
|
// TODO: optimize
|
10758
10963
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
@@ -11930,8 +12135,8 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11930
12135
|
const int ith = params->ith;
|
11931
12136
|
const int nth = params->nth;
|
11932
12137
|
|
11933
|
-
const int n_past =
|
11934
|
-
const bool inplace =
|
12138
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
12139
|
+
const bool inplace = src0->data == dst->data;
|
11935
12140
|
|
11936
12141
|
GGML_ASSERT(n_past >= 0);
|
11937
12142
|
|
@@ -12142,6 +12347,7 @@ static void ggml_compute_forward_soft_max_back_f32(
|
|
12142
12347
|
// dx = J * dy
|
12143
12348
|
// dxk = sum_i(Jki * dyi)
|
12144
12349
|
// dxk = sum_i(-yk*yi * dyi) - (-yk*yk)*dyk + (yk - yk*yk)*dyk
|
12350
|
+
// dxk = sum_i(-yk*yi * dyi) + yk*yk*dyk + yk*dyk - yk*yk*dyk
|
12145
12351
|
// dxk = sum_i(-yk*yi * dyi) + yk*dyk
|
12146
12352
|
// dxk = -yk * sum_i(yi * dyi) + yk*dyk
|
12147
12353
|
// dxk = -yk * dot(y, dy) + yk*dyk
|
@@ -13497,7 +13703,6 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
13497
13703
|
const struct ggml_compute_params * params,
|
13498
13704
|
const struct ggml_tensor * src0,
|
13499
13705
|
const struct ggml_tensor * src1,
|
13500
|
-
const struct ggml_tensor * opt0,
|
13501
13706
|
struct ggml_tensor * dst) {
|
13502
13707
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
13503
13708
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
@@ -13557,7 +13762,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
13557
13762
|
return;
|
13558
13763
|
}
|
13559
13764
|
|
13560
|
-
const int32_t stride = (
|
13765
|
+
const int32_t stride = ggml_get_op_params_i32(dst, 0);
|
13561
13766
|
|
13562
13767
|
// total patches in dst
|
13563
13768
|
const int np = ne2;
|
@@ -13570,7 +13775,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
13570
13775
|
const int ip1 = MIN(ip0 + dp, np);
|
13571
13776
|
|
13572
13777
|
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
13573
|
-
ggml_fp16_t * const wdata_src =
|
13778
|
+
ggml_fp16_t * const wdata_src = wdata + nk;
|
13574
13779
|
|
13575
13780
|
for (int i2 = ip0; i2 < ip1; i2++) { // Cout
|
13576
13781
|
float * dst_data = (float *)((char *) dst->data + i2*nb2);
|
@@ -13582,9 +13787,8 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
13582
13787
|
for (int i00 = 0; i00 < ne00; i00++) {
|
13583
13788
|
float v = 0;
|
13584
13789
|
ggml_vec_dot_f16(ne03, &v,
|
13585
|
-
|
13586
|
-
|
13587
|
-
|
13790
|
+
wdata_src + i1n,
|
13791
|
+
wdata_kernel + i01*ne00*ne03 + i00*ne03);
|
13588
13792
|
dst_data[(i11*stride + i01)*ne0 + i10*stride + i00] += v;
|
13589
13793
|
}
|
13590
13794
|
}
|
@@ -13934,7 +14138,7 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13934
14138
|
vvexpf(S, S, &Mup);
|
13935
14139
|
ggml_vec_sum_f32(Mup, &sum, S);
|
13936
14140
|
#else
|
13937
|
-
uint16_t scvt[GGML_SOFT_MAX_UNROLL];
|
14141
|
+
uint16_t scvt[GGML_SOFT_MAX_UNROLL]; UNUSED(scvt);
|
13938
14142
|
ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 };
|
13939
14143
|
|
13940
14144
|
for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) {
|
@@ -13944,9 +14148,13 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13944
14148
|
if (SS[j] == -INFINITY) {
|
13945
14149
|
SS[j] = 0.0f;
|
13946
14150
|
} else {
|
14151
|
+
#ifndef GGML_FLASH_ATTN_EXP_FP16
|
14152
|
+
const float val = expf(SS[j] - max);
|
14153
|
+
#else
|
13947
14154
|
ggml_fp16_t s = GGML_FP32_TO_FP16(SS[j] - max);
|
13948
14155
|
memcpy(&scvt[j], &s, sizeof(uint16_t));
|
13949
14156
|
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt[j]]);
|
14157
|
+
#endif
|
13950
14158
|
sump[j] += (ggml_float)val;
|
13951
14159
|
SS[j] = val;
|
13952
14160
|
}
|
@@ -14524,7 +14732,7 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
14524
14732
|
vvexpf(SM, SM, &Mup);
|
14525
14733
|
ggml_vec_sum_f32(Mup, &sum, SM);
|
14526
14734
|
#else
|
14527
|
-
uint16_t scvt[GGML_SOFT_MAX_UNROLL];
|
14735
|
+
uint16_t scvt[GGML_SOFT_MAX_UNROLL]; UNUSED(scvt);
|
14528
14736
|
ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 };
|
14529
14737
|
|
14530
14738
|
for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) {
|
@@ -14535,9 +14743,13 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
14535
14743
|
if (SR[j] == -INFINITY) {
|
14536
14744
|
SW[j] = 0.0f;
|
14537
14745
|
} else {
|
14746
|
+
#ifndef GGML_FLASH_ATTN_EXP_FP16
|
14747
|
+
const float val = expf(SR[j] - max);
|
14748
|
+
#else
|
14538
14749
|
ggml_fp16_t s = GGML_FP32_TO_FP16(SR[j] - max);
|
14539
14750
|
memcpy(&scvt[j], &s, sizeof(uint16_t));
|
14540
14751
|
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt[j]]);
|
14752
|
+
#endif
|
14541
14753
|
sump[j] += (ggml_float)val;
|
14542
14754
|
SW[j] = val;
|
14543
14755
|
}
|
@@ -15275,6 +15487,8 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
15275
15487
|
const int nc = src0->ne[0];
|
15276
15488
|
const int nr = ggml_nrows(src0);
|
15277
15489
|
|
15490
|
+
GGML_ASSERT(params->wsize >= sizeof(float) * (nth + nth * nc));
|
15491
|
+
|
15278
15492
|
if (params->type == GGML_TASK_INIT) {
|
15279
15493
|
if (ith == 0) {
|
15280
15494
|
memset(sums, 0, sizeof(float) * (nth + nth * nc));
|
@@ -15286,7 +15500,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
15286
15500
|
if (ith == 0) {
|
15287
15501
|
float * dp = (float *) dst->data;
|
15288
15502
|
ggml_vec_sum_f32(nth, dp, sums);
|
15289
|
-
dp[0] *= -1.0f;
|
15503
|
+
dp[0] *= -1.0f / (float) nr;
|
15290
15504
|
}
|
15291
15505
|
return;
|
15292
15506
|
}
|
@@ -15303,7 +15517,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
15303
15517
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
15304
15518
|
float * s0 = (float *)((char *) src0->data + i1*src0->nb[1]);
|
15305
15519
|
float * s1 = (float *)((char *) src1->data + i1*src1->nb[1]);
|
15306
|
-
float * st = (float *) params->wdata + nth + ith*nc;
|
15520
|
+
float * st = ((float *) params->wdata) + nth + ith*nc;
|
15307
15521
|
|
15308
15522
|
#ifndef NDEBUG
|
15309
15523
|
for (int i = 0; i < nc; ++i) {
|
@@ -15318,15 +15532,19 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
15318
15532
|
float max = -INFINITY;
|
15319
15533
|
ggml_vec_max_f32(nc, &max, s0);
|
15320
15534
|
|
15321
|
-
uint16_t scvt;
|
15535
|
+
uint16_t scvt; UNUSED(scvt);
|
15322
15536
|
for (int i = 0; i < nc; i++) {
|
15323
15537
|
if (s0[i] == -INFINITY) {
|
15324
15538
|
st[i] = 0.0f;
|
15325
15539
|
} else {
|
15326
|
-
|
15540
|
+
#ifndef GGML_CROSS_ENTROPY_EXP_FP16
|
15541
|
+
const float s = s0[i] - max;
|
15542
|
+
const float val = expf(s);
|
15543
|
+
#else
|
15327
15544
|
ggml_fp16_t s = GGML_FP32_TO_FP16(s0[i] - max);
|
15328
15545
|
memcpy(&scvt, &s, sizeof(scvt));
|
15329
15546
|
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt]);
|
15547
|
+
#endif
|
15330
15548
|
sum += (ggml_float)val;
|
15331
15549
|
st[i] = val;
|
15332
15550
|
}
|
@@ -15342,7 +15560,9 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
15342
15560
|
ggml_vec_log_f32(nc, st, st);
|
15343
15561
|
ggml_vec_mul_f32(nc, st, st, s1);
|
15344
15562
|
|
15345
|
-
|
15563
|
+
float st_sum = 0;
|
15564
|
+
ggml_vec_sum_f32(nc, &st_sum, st);
|
15565
|
+
sums[ith] += st_sum;
|
15346
15566
|
|
15347
15567
|
#ifndef NDEBUG
|
15348
15568
|
for (int i = 0; i < nc; ++i) {
|
@@ -15392,7 +15612,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
15392
15612
|
return;
|
15393
15613
|
}
|
15394
15614
|
|
15395
|
-
const
|
15615
|
+
const double eps = 1e-9;
|
15396
15616
|
|
15397
15617
|
// TODO: handle transposed/permuted matrices
|
15398
15618
|
const int64_t nc = src0->ne[0];
|
@@ -15411,7 +15631,6 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
15411
15631
|
float * ds0 = (float *)((char *) dst->data + i1*dst->nb[1]);
|
15412
15632
|
float * s0 = (float *)((char *) src0->data + i1*src0->nb[1]);
|
15413
15633
|
float * s1 = (float *)((char *) src1->data + i1*src1->nb[1]);
|
15414
|
-
float * sm = (float *) params->wdata + ith*nc;
|
15415
15634
|
|
15416
15635
|
#ifndef NDEBUG
|
15417
15636
|
for (int i = 0; i < nc; ++i) {
|
@@ -15420,54 +15639,6 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
15420
15639
|
assert(!isnan(s1[i]));
|
15421
15640
|
}
|
15422
15641
|
#endif
|
15423
|
-
// step by step explanation:
|
15424
|
-
{
|
15425
|
-
//float * sums = (float *) params->wdata;
|
15426
|
-
|
15427
|
-
// forward pass with annotated gradients from backward pass
|
15428
|
-
// (built by going in reverse operation order, adding to gradients of current operation args)
|
15429
|
-
// st0 = exp(s0-max(s0)) grad[st0] = grad[st1]*(1.0 - eps)/sum
|
15430
|
-
// from softmax_back: grad[s0] = st1_k * (grad[st1]_k - dot(st1, grad[st1]))
|
15431
|
-
// ggml_vec_scale_f32(nc, st, sum); // st1 = st0*/sum = softmax(s0) grad[st1] = grad[st2]*(1.0 - eps)
|
15432
|
-
// ggml_vec_scale_f32(nc, st, (1.0f - eps)); // st2 = st1*(1.0 - eps) grad[st2] = grad[st3]
|
15433
|
-
// ggml_vec_add1_f32(nc, st, st, eps); // st3 = st2 + eps grad[st3] = grad[st4]/st3
|
15434
|
-
// ggml_vec_log_f32(nc, st, st); // st4 = log(st3) grad[st4] = grad[st5] * s1
|
15435
|
-
// ggml_vec_mul_f32(nc, st, st, s1); // st5 = st4 * s1 grad[st5] = grad[sums[ith]]
|
15436
|
-
// ggml_vec_sum_f32(nc, sums + ith, st); // sums[ith] = st5 grad[sums[ith]] = grad[cross_entropy_loss] = -grad[cel]
|
15437
|
-
|
15438
|
-
// substitute into grad[st1], because we can reuse softmax_back from this point on
|
15439
|
-
// grad[st1] = -grad[cel]*s1*(1.0 - eps)/(eps + softmax(s0)*(1.0 - eps))
|
15440
|
-
// postorder:
|
15441
|
-
// grad[st1] := softmax(s0)
|
15442
|
-
// grad[st1] := grad[st1]*(1.0 - eps)
|
15443
|
-
// grad[st1] := grad[st1] + eps
|
15444
|
-
// grad[st1] := s1 / grad[st1]
|
15445
|
-
// grad[st1] := grad[st1]*(1.0-eps)*-grad[cel]
|
15446
|
-
|
15447
|
-
// src0 gradients by going through softmax_back
|
15448
|
-
// grad[s0] = st1_k * (grad[st1]_k - dot(st1, grad[st1]))
|
15449
|
-
// from softmax_back:
|
15450
|
-
// dxk = yk * (dyk - dot(y, dy))
|
15451
|
-
// dot_y_dy := dot(y, dy)
|
15452
|
-
// dx := dy
|
15453
|
-
// dx := dx - dot_y_dy
|
15454
|
-
// dx := dx * y
|
15455
|
-
// postorder:
|
15456
|
-
// dot_st1_dst1 := dot(st1, grad[st1])
|
15457
|
-
// grad[s0] := grad[st1]
|
15458
|
-
// grad[s0] := grad[s0] - dot_st1_dst1
|
15459
|
-
// grad[s0] := grad[s0] * st1
|
15460
|
-
|
15461
|
-
// prepend postorder from grad[st1] directly using grad[s0] as memory location, as we will grad[s0] := grad[st1]
|
15462
|
-
// sm := softmax(s0)
|
15463
|
-
// grad[s0] := sm*(1.0 - eps)
|
15464
|
-
// grad[s0] := grad[s0] + eps
|
15465
|
-
// grad[s0] := s1 / grad[s0]
|
15466
|
-
// grad[s0] := grad[s0]*(1.0-eps)*-grad[cel]
|
15467
|
-
// dot_st1_dst1 := dot(sm, grad[s0])
|
15468
|
-
// grad[s0] := grad[s0] - dot_st1_dst1
|
15469
|
-
// grad[s0] := grad[s0] * sm
|
15470
|
-
}
|
15471
15642
|
|
15472
15643
|
// soft_max
|
15473
15644
|
ggml_float sum = 0.0;
|
@@ -15475,39 +15646,37 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
15475
15646
|
float max = -INFINITY;
|
15476
15647
|
ggml_vec_max_f32(nc, &max, s0);
|
15477
15648
|
|
15478
|
-
uint16_t scvt;
|
15649
|
+
uint16_t scvt; UNUSED(scvt);
|
15479
15650
|
for (int i = 0; i < nc; i++) {
|
15480
15651
|
if (s0[i] == -INFINITY) {
|
15481
|
-
|
15652
|
+
ds0[i] = 0.0f;
|
15482
15653
|
} else {
|
15483
|
-
|
15654
|
+
#ifndef GGML_CROSS_ENTROPY_EXP_FP16
|
15655
|
+
const float s = s0[i] - max;
|
15656
|
+
const float val = expf(s);
|
15657
|
+
#else
|
15484
15658
|
ggml_fp16_t s = GGML_FP32_TO_FP16(s0[i] - max);
|
15485
15659
|
memcpy(&scvt, &s, sizeof(scvt));
|
15486
15660
|
const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt]);
|
15661
|
+
#endif
|
15487
15662
|
sum += (ggml_float)val;
|
15488
|
-
|
15663
|
+
ds0[i] = val;
|
15489
15664
|
}
|
15490
15665
|
}
|
15491
15666
|
|
15492
15667
|
assert(sum > 0.0);
|
15493
|
-
sum = 1.0/sum;
|
15668
|
+
sum = (1.0 - eps)/sum;
|
15494
15669
|
}
|
15495
15670
|
|
15496
|
-
|
15497
|
-
ggml_vec_scale_f32(nc,
|
15498
|
-
|
15499
|
-
|
15500
|
-
|
15501
|
-
|
15502
|
-
ggml_vec_scale_f32(nc, ds0, -(1.0f - eps)*d[0]);
|
15503
|
-
ggml_vec_dot_f32 (nc, &dot_st1_dst1, sm, ds0);
|
15504
|
-
ggml_vec_acc1_f32 (nc, ds0, -dot_st1_dst1);
|
15505
|
-
ggml_vec_mul_f32 (nc, ds0, ds0, sm);
|
15671
|
+
// grad(src0) = (softmax(src0) - src1) * grad(cross_entropy_loss(src0, src1)) / nr
|
15672
|
+
ggml_vec_scale_f32(nc, ds0, sum);
|
15673
|
+
ggml_vec_add1_f32(nc, ds0, ds0, eps);
|
15674
|
+
ggml_vec_sub_f32(nc, ds0, ds0, s1);
|
15675
|
+
ggml_vec_scale_f32(nc, ds0, d[0] / (float) nr);
|
15676
|
+
|
15506
15677
|
|
15507
15678
|
#ifndef NDEBUG
|
15508
15679
|
for (int i = 0; i < nc; ++i) {
|
15509
|
-
assert(!isnan(sm[i]));
|
15510
|
-
assert(!isinf(sm[i]));
|
15511
15680
|
assert(!isnan(ds0[i]));
|
15512
15681
|
assert(!isinf(ds0[i]));
|
15513
15682
|
}
|
@@ -15731,7 +15900,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15731
15900
|
} break;
|
15732
15901
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
15733
15902
|
{
|
15734
|
-
ggml_compute_forward_conv_transpose_2d(params, tensor->src[0], tensor->src[1], tensor
|
15903
|
+
ggml_compute_forward_conv_transpose_2d(params, tensor->src[0], tensor->src[1], tensor);
|
15735
15904
|
} break;
|
15736
15905
|
case GGML_OP_POOL_1D:
|
15737
15906
|
{
|
@@ -16062,9 +16231,12 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
16062
16231
|
{
|
16063
16232
|
// necessary for llama
|
16064
16233
|
if (src0->grad) {
|
16234
|
+
float eps;
|
16235
|
+
memcpy(&eps, tensor->op_params, sizeof(float));
|
16236
|
+
|
16065
16237
|
src0->grad = ggml_add_impl(ctx,
|
16066
16238
|
src0->grad,
|
16067
|
-
ggml_rms_norm_back(ctx, src0, tensor->grad),
|
16239
|
+
ggml_rms_norm_back(ctx, src0, tensor->grad, eps),
|
16068
16240
|
inplace);
|
16069
16241
|
}
|
16070
16242
|
} break;
|
@@ -16832,9 +17004,7 @@ struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) {
|
|
16832
17004
|
return result;
|
16833
17005
|
}
|
16834
17006
|
|
16835
|
-
|
16836
|
-
struct ggml_cgraph result = *gf;
|
16837
|
-
|
17007
|
+
void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep) {
|
16838
17008
|
GGML_ASSERT(gf->n_nodes > 0);
|
16839
17009
|
|
16840
17010
|
// if we are keeping the gradient graph, we have to detach the gradient nodes from the original graph
|
@@ -16858,15 +17028,19 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
|
|
16858
17028
|
}
|
16859
17029
|
}
|
16860
17030
|
|
16861
|
-
for (int i =
|
17031
|
+
for (int i = 0; i < gf->n_nodes; i++) {
|
16862
17032
|
struct ggml_tensor * node = gf->nodes[i];
|
16863
17033
|
|
16864
17034
|
if (node->is_param) {
|
16865
17035
|
GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
|
16866
|
-
ggml_build_forward_expand(
|
17036
|
+
ggml_build_forward_expand(gb, node->grad);
|
16867
17037
|
}
|
16868
17038
|
}
|
17039
|
+
}
|
16869
17040
|
|
17041
|
+
struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep) {
|
17042
|
+
struct ggml_cgraph result = *gf;
|
17043
|
+
ggml_build_backward_expand(ctx, gf, &result, keep);
|
16870
17044
|
return result;
|
16871
17045
|
}
|
16872
17046
|
|
@@ -17542,10 +17716,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
17542
17716
|
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
17543
17717
|
{
|
17544
17718
|
n_tasks = n_threads;
|
17545
|
-
|
17546
|
-
size_t cur = ggml_type_size(node->type)*node->src[0]->ne[0]*n_tasks;
|
17547
|
-
|
17548
|
-
work_size = MAX(work_size, cur);
|
17549
17719
|
} break;
|
17550
17720
|
case GGML_OP_NONE:
|
17551
17721
|
{
|
@@ -18423,14 +18593,16 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18423
18593
|
struct ggml_opt_params params,
|
18424
18594
|
struct ggml_tensor * f,
|
18425
18595
|
struct ggml_cgraph * gf,
|
18426
|
-
struct ggml_cgraph * gb
|
18596
|
+
struct ggml_cgraph * gb,
|
18597
|
+
ggml_opt_callback callback,
|
18598
|
+
void * callback_data) {
|
18427
18599
|
GGML_ASSERT(ggml_is_scalar(f));
|
18428
18600
|
|
18429
18601
|
// these will store the parameters we want to optimize
|
18430
18602
|
struct ggml_tensor * ps[GGML_MAX_PARAMS];
|
18431
18603
|
|
18432
18604
|
int np = 0;
|
18433
|
-
|
18605
|
+
int64_t nx = 0;
|
18434
18606
|
for (int i = 0; i < gf->n_nodes; ++i) {
|
18435
18607
|
if (gf->nodes[i]->is_param) {
|
18436
18608
|
GGML_PRINT_DEBUG("found param %d: grad->op = %d\n", np, gf->nodes[i]->grad->op);
|
@@ -18449,31 +18621,32 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18449
18621
|
}
|
18450
18622
|
|
18451
18623
|
// constants
|
18452
|
-
|
18453
|
-
const float
|
18454
|
-
const float
|
18624
|
+
float sched = params.adam.sched;
|
18625
|
+
const float alpha = params.adam.alpha;
|
18626
|
+
const float decay = params.adam.decay * alpha;
|
18455
18627
|
const float beta1 = params.adam.beta1;
|
18456
18628
|
const float beta2 = params.adam.beta2;
|
18457
18629
|
const float eps = params.adam.eps;
|
18630
|
+
const float gclip = params.adam.gclip;
|
18631
|
+
const int decay_min_ndim = params.adam.decay_min_ndim;
|
18458
18632
|
|
18459
|
-
float * x = opt->adam.x->data; // view of the parameters
|
18460
|
-
float * g1 = opt->adam.g1->data; // gradient
|
18461
|
-
float * g2 = opt->adam.g2->data; // gradient squared
|
18462
18633
|
float * m = opt->adam.m->data; // first moment
|
18463
18634
|
float * v = opt->adam.v->data; // second moment
|
18464
|
-
float * mh = opt->adam.mh->data; // first moment hat
|
18465
|
-
float * vh = opt->adam.vh->data; // second moment hat
|
18466
18635
|
|
18467
18636
|
float * pf = params.past > 0 ? opt->adam.pf->data : NULL; // past function values
|
18468
18637
|
|
18469
|
-
|
18470
|
-
|
18638
|
+
if (callback) {
|
18639
|
+
callback(callback_data, &sched);
|
18640
|
+
}
|
18471
18641
|
|
18472
18642
|
// compute the function value
|
18473
18643
|
ggml_graph_reset (gf);
|
18474
18644
|
ggml_set_f32 (f->grad, 1.0f);
|
18475
18645
|
|
18476
|
-
|
18646
|
+
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
18647
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
|
18648
|
+
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
18649
|
+
ggml_graph_compute(gb, &cplan);
|
18477
18650
|
|
18478
18651
|
opt->adam.fx_prev = ggml_get_f32_1d(f, 0);
|
18479
18652
|
opt->adam.fx_best = opt->adam.fx_prev;
|
@@ -18481,6 +18654,9 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18481
18654
|
pf[opt->iter % params.past] = opt->adam.fx_prev;
|
18482
18655
|
}
|
18483
18656
|
|
18657
|
+
opt->loss_before = opt->adam.fx_prev;
|
18658
|
+
opt->loss_after = opt->adam.fx_prev;
|
18659
|
+
|
18484
18660
|
// initialize
|
18485
18661
|
if (opt->just_initialized) {
|
18486
18662
|
opt->adam.n_no_improvement = 0;
|
@@ -18513,50 +18689,55 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18513
18689
|
UNUSED(t_start_cpu);
|
18514
18690
|
|
18515
18691
|
{
|
18516
|
-
|
18517
|
-
|
18518
|
-
|
18519
|
-
|
18520
|
-
|
18521
|
-
|
18522
|
-
|
18523
|
-
|
18524
|
-
|
18525
|
-
|
18526
|
-
|
18527
|
-
|
18528
|
-
|
18529
|
-
|
18530
|
-
|
18531
|
-
|
18532
|
-
|
18533
|
-
|
18534
|
-
|
18535
|
-
|
18536
|
-
|
18537
|
-
|
18538
|
-
|
18539
|
-
|
18540
|
-
|
18541
|
-
|
18542
|
-
|
18543
|
-
|
18544
|
-
|
18545
|
-
|
18546
|
-
|
18547
|
-
|
18548
|
-
|
18692
|
+
float gnorm = 1.0f;
|
18693
|
+
if (gclip > 0.0f) {
|
18694
|
+
// gradient clipping
|
18695
|
+
ggml_float sum = 0.0;
|
18696
|
+
for (int p = 0; p < np; ++p) {
|
18697
|
+
const int64_t ne = ggml_nelements(ps[p]);
|
18698
|
+
for (int64_t j = 0; j < ne; ++j) {
|
18699
|
+
float g = ggml_get_f32_1d(ps[p]->grad, j);
|
18700
|
+
sum += (ggml_float)(g*g);
|
18701
|
+
}
|
18702
|
+
}
|
18703
|
+
ggml_float norm = sqrt(sum);
|
18704
|
+
if (norm > (ggml_float) gclip) {
|
18705
|
+
gnorm = (float) ((ggml_float) gclip / norm);
|
18706
|
+
}
|
18707
|
+
}
|
18708
|
+
const float beta1h = alpha*sched/(1.0f - powf(beta1, opt->iter));
|
18709
|
+
const float beta2h = 1.0f/(1.0f - powf(beta2, opt->iter));
|
18710
|
+
int64_t i = 0;
|
18711
|
+
for (int p = 0; p < np; ++p) {
|
18712
|
+
const int64_t ne = ggml_nelements(ps[p]);
|
18713
|
+
const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0f) * sched;
|
18714
|
+
for (int64_t j = 0; j < ne; ++j) {
|
18715
|
+
float x = ggml_get_f32_1d(ps[p], j);
|
18716
|
+
float g = ggml_get_f32_1d(ps[p]->grad, j)*gnorm;
|
18717
|
+
m[i] = m[i]*beta1 + g*(1.0f - beta1);
|
18718
|
+
v[i] = v[i]*beta2 + g*g*(1.0f - beta2);
|
18719
|
+
float mh = m[i]*beta1h;
|
18720
|
+
float vh = v[i]*beta2h;
|
18721
|
+
vh = sqrtf(vh) + eps;
|
18722
|
+
x = x*(1.0f - p_decay) - mh/vh;
|
18723
|
+
ggml_set_f32_1d(ps[p], j, x);
|
18724
|
+
++i;
|
18725
|
+
}
|
18726
|
+
}
|
18727
|
+
}
|
18549
18728
|
|
18550
|
-
|
18551
|
-
|
18729
|
+
if (callback) {
|
18730
|
+
callback(callback_data, &sched);
|
18552
18731
|
}
|
18553
18732
|
|
18554
18733
|
ggml_graph_reset (gf);
|
18555
18734
|
ggml_set_f32 (f->grad, 1.0f);
|
18556
18735
|
|
18557
|
-
|
18736
|
+
ggml_graph_compute(gb, &cplan);
|
18558
18737
|
|
18559
18738
|
const float fx = ggml_get_f32_1d(f, 0);
|
18739
|
+
opt->loss_after = fx;
|
18740
|
+
|
18560
18741
|
|
18561
18742
|
// check convergence
|
18562
18743
|
if (fabsf(fx - fx_prev[0])/fx < params.adam.eps_f) {
|
@@ -18625,7 +18806,6 @@ struct ggml_lbfgs_iteration_data {
|
|
18625
18806
|
};
|
18626
18807
|
|
18627
18808
|
static enum ggml_opt_result linesearch_backtracking(
|
18628
|
-
struct ggml_context * ctx,
|
18629
18809
|
const struct ggml_opt_params * params,
|
18630
18810
|
int nx,
|
18631
18811
|
float * x,
|
@@ -18637,8 +18817,11 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18637
18817
|
struct ggml_tensor * f,
|
18638
18818
|
struct ggml_cgraph * gf,
|
18639
18819
|
struct ggml_cgraph * gb,
|
18820
|
+
struct ggml_cplan * cplan,
|
18640
18821
|
const int np,
|
18641
|
-
struct ggml_tensor * ps[]
|
18822
|
+
struct ggml_tensor * ps[],
|
18823
|
+
ggml_opt_callback callback,
|
18824
|
+
void * callback_data) {
|
18642
18825
|
int count = 0;
|
18643
18826
|
|
18644
18827
|
float width = 0.0f;
|
@@ -18667,6 +18850,12 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18667
18850
|
dgtest = params->lbfgs.ftol*dginit;
|
18668
18851
|
|
18669
18852
|
while (true) {
|
18853
|
+
if (callback) {
|
18854
|
+
// LBFG-S does not support learning rate -> ignore learning schedule
|
18855
|
+
float sched = 0;
|
18856
|
+
callback(callback_data, &sched);
|
18857
|
+
}
|
18858
|
+
|
18670
18859
|
ggml_vec_cpy_f32(nx, x, xp);
|
18671
18860
|
ggml_vec_mad_f32(nx, x, d, *step);
|
18672
18861
|
|
@@ -18677,7 +18866,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18677
18866
|
ggml_graph_reset (gf);
|
18678
18867
|
ggml_set_f32 (f->grad, 1.0f);
|
18679
18868
|
|
18680
|
-
|
18869
|
+
ggml_graph_compute(gb, cplan);
|
18681
18870
|
|
18682
18871
|
ggml_opt_get_grad(np, ps, g);
|
18683
18872
|
|
@@ -18737,7 +18926,9 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18737
18926
|
struct ggml_opt_params params,
|
18738
18927
|
struct ggml_tensor * f,
|
18739
18928
|
struct ggml_cgraph * gf,
|
18740
|
-
struct ggml_cgraph * gb
|
18929
|
+
struct ggml_cgraph * gb,
|
18930
|
+
ggml_opt_callback callback,
|
18931
|
+
void * callback_data) {
|
18741
18932
|
if (params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE ||
|
18742
18933
|
params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
|
18743
18934
|
if (params.lbfgs.wolfe <= params.lbfgs.ftol || 1.f <= params.lbfgs.wolfe) {
|
@@ -18769,6 +18960,10 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18769
18960
|
opt->iter = iter;
|
18770
18961
|
}
|
18771
18962
|
|
18963
|
+
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
18964
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
|
18965
|
+
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
18966
|
+
|
18772
18967
|
float * x = opt->lbfgs.x->data; // current parameters
|
18773
18968
|
float * xp = opt->lbfgs.xp->data; // previous parameters
|
18774
18969
|
float * g = opt->lbfgs.g->data; // current gradient
|
@@ -18790,6 +18985,12 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18790
18985
|
float * lm_s = opt->lbfgs.lms->data;
|
18791
18986
|
float * lm_y = opt->lbfgs.lmy->data;
|
18792
18987
|
|
18988
|
+
if (callback) {
|
18989
|
+
// LBFG-S does not support learning rate -> ignore learning schedule
|
18990
|
+
float sched = 0;
|
18991
|
+
callback(callback_data, &sched);
|
18992
|
+
}
|
18993
|
+
|
18793
18994
|
// evaluate the function value and its gradient
|
18794
18995
|
{
|
18795
18996
|
ggml_opt_set_params(np, ps, x);
|
@@ -18797,11 +18998,14 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18797
18998
|
ggml_graph_reset (gf);
|
18798
18999
|
ggml_set_f32 (f->grad, 1.0f);
|
18799
19000
|
|
18800
|
-
|
19001
|
+
ggml_graph_compute(gb, &cplan);
|
18801
19002
|
|
18802
19003
|
ggml_opt_get_grad(np, ps, g);
|
18803
19004
|
|
18804
19005
|
fx = ggml_get_f32_1d(f, 0);
|
19006
|
+
|
19007
|
+
opt->loss_before = fx;
|
19008
|
+
opt->loss_after = fx;
|
18805
19009
|
}
|
18806
19010
|
|
18807
19011
|
// search direction = -gradient
|
@@ -18856,7 +19060,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18856
19060
|
ggml_vec_cpy_f32(nx, xp, x);
|
18857
19061
|
ggml_vec_cpy_f32(nx, gp, g);
|
18858
19062
|
|
18859
|
-
ls = linesearch_backtracking(
|
19063
|
+
ls = linesearch_backtracking(¶ms, nx, x, &fx, g, d, step, xp, f, gf, gb, &cplan, np, ps, callback, callback_data);
|
18860
19064
|
|
18861
19065
|
if (ls < 0) {
|
18862
19066
|
// linesearch failed - go back to the previous point and return
|
@@ -18866,6 +19070,8 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18866
19070
|
return ls;
|
18867
19071
|
}
|
18868
19072
|
|
19073
|
+
opt->loss_after = fx;
|
19074
|
+
|
18869
19075
|
ggml_vec_norm_f32(nx, &xnorm, x);
|
18870
19076
|
ggml_vec_norm_f32(nx, &gnorm, g);
|
18871
19077
|
|
@@ -18923,7 +19129,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18923
19129
|
// ys = y^t \cdot s -> 1 / \rho.
|
18924
19130
|
// yy = y^t \cdot y.
|
18925
19131
|
//
|
18926
|
-
ggml_vec_dot_f32(nx, &ys, &lm_y[end[0]*nx], &lm_s[end[0]
|
19132
|
+
ggml_vec_dot_f32(nx, &ys, &lm_y[end[0]*nx], &lm_s[end[0]*nx]);
|
18927
19133
|
ggml_vec_dot_f32(nx, &yy, &lm_y[end[0]*nx], &lm_y[end[0]*nx]);
|
18928
19134
|
|
18929
19135
|
lm_ys[end[0]] = ys;
|
@@ -18986,13 +19192,15 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
18986
19192
|
.adam = {
|
18987
19193
|
.n_iter = 10000,
|
18988
19194
|
.sched = 1.000f,
|
18989
|
-
.decay = 0.
|
19195
|
+
.decay = 0.0f,
|
19196
|
+
.decay_min_ndim = 2,
|
18990
19197
|
.alpha = 0.001f,
|
18991
19198
|
.beta1 = 0.9f,
|
18992
19199
|
.beta2 = 0.999f,
|
18993
19200
|
.eps = 1e-8f,
|
18994
19201
|
.eps_f = 1e-5f,
|
18995
19202
|
.eps_g = 1e-3f,
|
19203
|
+
.gclip = 0.0f,
|
18996
19204
|
},
|
18997
19205
|
};
|
18998
19206
|
} break;
|
@@ -19042,23 +19250,13 @@ GGML_API void ggml_opt_init(
|
|
19042
19250
|
switch (opt->params.type) {
|
19043
19251
|
case GGML_OPT_ADAM:
|
19044
19252
|
{
|
19045
|
-
opt->adam.x = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx);
|
19046
|
-
opt->adam.g1 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx);
|
19047
|
-
opt->adam.g2 = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx);
|
19048
19253
|
opt->adam.m = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx);
|
19049
19254
|
opt->adam.v = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx);
|
19050
|
-
opt->adam.mh = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx);
|
19051
|
-
opt->adam.vh = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nx);
|
19052
19255
|
opt->adam.pf = params.past > 0
|
19053
19256
|
? ggml_new_tensor_1d(ctx, GGML_TYPE_F32, params.past)
|
19054
19257
|
: NULL;
|
19055
|
-
ggml_set_zero(opt->adam.x);
|
19056
|
-
ggml_set_zero(opt->adam.g1);
|
19057
|
-
ggml_set_zero(opt->adam.g2);
|
19058
19258
|
ggml_set_zero(opt->adam.m);
|
19059
19259
|
ggml_set_zero(opt->adam.v);
|
19060
|
-
ggml_set_zero(opt->adam.mh);
|
19061
|
-
ggml_set_zero(opt->adam.vh);
|
19062
19260
|
if (opt->adam.pf) {
|
19063
19261
|
ggml_set_zero(opt->adam.pf);
|
19064
19262
|
}
|
@@ -19142,7 +19340,7 @@ enum ggml_opt_result ggml_opt_resume(
|
|
19142
19340
|
*gf = ggml_build_forward (f);
|
19143
19341
|
*gb = ggml_build_backward(ctx, gf, true);
|
19144
19342
|
|
19145
|
-
return ggml_opt_resume_g(ctx, opt, f, gf, gb);
|
19343
|
+
return ggml_opt_resume_g(ctx, opt, f, gf, gb, NULL, NULL);
|
19146
19344
|
}
|
19147
19345
|
|
19148
19346
|
enum ggml_opt_result ggml_opt_resume_g(
|
@@ -19150,7 +19348,9 @@ enum ggml_opt_result ggml_opt_resume_g(
|
|
19150
19348
|
struct ggml_opt_context * opt,
|
19151
19349
|
struct ggml_tensor * f,
|
19152
19350
|
struct ggml_cgraph * gf,
|
19153
|
-
struct ggml_cgraph * gb
|
19351
|
+
struct ggml_cgraph * gb,
|
19352
|
+
ggml_opt_callback callback,
|
19353
|
+
void * callback_data) {
|
19154
19354
|
|
19155
19355
|
// build forward + backward compute graphs
|
19156
19356
|
enum ggml_opt_result result = GGML_OPT_OK;
|
@@ -19158,11 +19358,11 @@ enum ggml_opt_result ggml_opt_resume_g(
|
|
19158
19358
|
switch (opt->params.type) {
|
19159
19359
|
case GGML_OPT_ADAM:
|
19160
19360
|
{
|
19161
|
-
result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb);
|
19361
|
+
result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
19162
19362
|
} break;
|
19163
19363
|
case GGML_OPT_LBFGS:
|
19164
19364
|
{
|
19165
|
-
result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb);
|
19365
|
+
result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
19166
19366
|
} break;
|
19167
19367
|
}
|
19168
19368
|
|
@@ -19394,7 +19594,7 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19394
19594
|
////////////////////////////////////////////////////////////////////////////////
|
19395
19595
|
|
19396
19596
|
struct gguf_str {
|
19397
|
-
|
19597
|
+
uint64_t n; // GGUFv2
|
19398
19598
|
char * data;
|
19399
19599
|
};
|
19400
19600
|
|
@@ -19408,9 +19608,12 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
|
|
19408
19608
|
[GGUF_TYPE_FLOAT32] = sizeof(float),
|
19409
19609
|
[GGUF_TYPE_BOOL] = sizeof(bool),
|
19410
19610
|
[GGUF_TYPE_STRING] = sizeof(struct gguf_str),
|
19611
|
+
[GGUF_TYPE_UINT64] = sizeof(uint64_t),
|
19612
|
+
[GGUF_TYPE_INT64] = sizeof(int64_t),
|
19613
|
+
[GGUF_TYPE_FLOAT64] = sizeof(double),
|
19411
19614
|
[GGUF_TYPE_ARRAY] = 0, // undefined
|
19412
19615
|
};
|
19413
|
-
static_assert(GGUF_TYPE_COUNT ==
|
19616
|
+
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
|
19414
19617
|
|
19415
19618
|
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
|
19416
19619
|
[GGUF_TYPE_UINT8] = "u8",
|
@@ -19423,8 +19626,11 @@ static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
|
|
19423
19626
|
[GGUF_TYPE_BOOL] = "bool",
|
19424
19627
|
[GGUF_TYPE_STRING] = "str",
|
19425
19628
|
[GGUF_TYPE_ARRAY] = "arr",
|
19629
|
+
[GGUF_TYPE_UINT64] = "u64",
|
19630
|
+
[GGUF_TYPE_INT64] = "i64",
|
19631
|
+
[GGUF_TYPE_FLOAT64] = "f64",
|
19426
19632
|
};
|
19427
|
-
static_assert(GGUF_TYPE_COUNT ==
|
19633
|
+
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
|
19428
19634
|
|
19429
19635
|
union gguf_value {
|
19430
19636
|
uint8_t uint8;
|
@@ -19434,6 +19640,9 @@ union gguf_value {
|
|
19434
19640
|
uint32_t uint32;
|
19435
19641
|
int32_t int32;
|
19436
19642
|
float float32;
|
19643
|
+
uint64_t uint64;
|
19644
|
+
int64_t int64;
|
19645
|
+
double float64;
|
19437
19646
|
bool bool_;
|
19438
19647
|
|
19439
19648
|
struct gguf_str str;
|
@@ -19441,7 +19650,7 @@ union gguf_value {
|
|
19441
19650
|
struct {
|
19442
19651
|
enum gguf_type type;
|
19443
19652
|
|
19444
|
-
|
19653
|
+
uint64_t n; // GGUFv2
|
19445
19654
|
void * data;
|
19446
19655
|
} arr;
|
19447
19656
|
};
|
@@ -19449,8 +19658,6 @@ union gguf_value {
|
|
19449
19658
|
struct gguf_kv {
|
19450
19659
|
struct gguf_str key;
|
19451
19660
|
|
19452
|
-
uint32_t n_bytes; // TODO: is this actually needed?
|
19453
|
-
|
19454
19661
|
enum gguf_type type;
|
19455
19662
|
union gguf_value value;
|
19456
19663
|
};
|
@@ -19458,15 +19665,15 @@ struct gguf_kv {
|
|
19458
19665
|
struct gguf_header {
|
19459
19666
|
uint32_t magic;
|
19460
19667
|
uint32_t version;
|
19461
|
-
|
19462
|
-
|
19668
|
+
uint64_t n_tensors; // GGUFv2
|
19669
|
+
uint64_t n_kv; // GGUFv2
|
19463
19670
|
};
|
19464
19671
|
|
19465
19672
|
struct gguf_tensor_info {
|
19466
19673
|
struct gguf_str name;
|
19467
19674
|
|
19468
19675
|
uint32_t n_dims;
|
19469
|
-
|
19676
|
+
uint64_t ne[GGML_MAX_DIMS];
|
19470
19677
|
|
19471
19678
|
enum ggml_type type;
|
19472
19679
|
|
@@ -19497,19 +19704,32 @@ static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset)
|
|
19497
19704
|
return n == size;
|
19498
19705
|
}
|
19499
19706
|
|
19500
|
-
|
19707
|
+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
19708
|
+
static bool gguf_fread_str_cur(FILE * file, struct gguf_str * p, size_t * offset) {
|
19501
19709
|
p->n = 0;
|
19502
19710
|
p->data = NULL;
|
19503
19711
|
|
19504
19712
|
bool ok = true;
|
19505
19713
|
|
19506
|
-
// TODO: how to avoid mallocs for strings?
|
19507
19714
|
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
|
19508
19715
|
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
|
19509
19716
|
|
19510
19717
|
return ok;
|
19511
19718
|
}
|
19512
19719
|
|
19720
|
+
static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset) {
|
19721
|
+
p->n = 0;
|
19722
|
+
p->data = NULL;
|
19723
|
+
|
19724
|
+
bool ok = true;
|
19725
|
+
|
19726
|
+
uint32_t n = 0;
|
19727
|
+
ok = ok && gguf_fread_el(file, &n, sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n;
|
19728
|
+
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
|
19729
|
+
|
19730
|
+
return ok;
|
19731
|
+
}
|
19732
|
+
|
19513
19733
|
struct gguf_context * gguf_init_empty(void) {
|
19514
19734
|
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
19515
19735
|
|
@@ -19565,8 +19785,21 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19565
19785
|
ctx->data = NULL;
|
19566
19786
|
|
19567
19787
|
ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
|
19568
|
-
|
19569
|
-
|
19788
|
+
|
19789
|
+
if (ctx->header.version == 1) {
|
19790
|
+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
19791
|
+
uint32_t n_tensors = 0;
|
19792
|
+
uint32_t n_kv = 0;
|
19793
|
+
|
19794
|
+
ok = ok && gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset);
|
19795
|
+
ok = ok && gguf_fread_el(file, &n_kv, sizeof(n_kv), &offset);
|
19796
|
+
|
19797
|
+
ctx->header.n_tensors = n_tensors;
|
19798
|
+
ctx->header.n_kv = n_kv;
|
19799
|
+
} else {
|
19800
|
+
ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
|
19801
|
+
ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
|
19802
|
+
}
|
19570
19803
|
|
19571
19804
|
if (!ok) {
|
19572
19805
|
fprintf(stderr, "%s: failed to read header\n", __func__);
|
@@ -19576,18 +19809,23 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19576
19809
|
}
|
19577
19810
|
}
|
19578
19811
|
|
19812
|
+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
19813
|
+
bool (* gguf_fread_str)(FILE *, struct gguf_str *, size_t *) = gguf_fread_str_cur;
|
19814
|
+
if (ctx->header.version == 1) {
|
19815
|
+
gguf_fread_str = gguf_fread_str_v1;
|
19816
|
+
}
|
19817
|
+
|
19579
19818
|
// read the kv pairs
|
19580
19819
|
{
|
19581
|
-
ctx->kv =
|
19820
|
+
ctx->kv = malloc(ctx->header.n_kv * sizeof(struct gguf_kv));
|
19582
19821
|
|
19583
19822
|
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
|
19584
19823
|
struct gguf_kv * kv = &ctx->kv[i];
|
19585
19824
|
|
19586
19825
|
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
19587
19826
|
|
19588
|
-
ok = ok && gguf_fread_str(file, &kv->key,
|
19589
|
-
|
19590
|
-
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
|
19827
|
+
ok = ok && gguf_fread_str(file, &kv->key, &offset);
|
19828
|
+
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
|
19591
19829
|
|
19592
19830
|
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
|
19593
19831
|
|
@@ -19599,12 +19837,23 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19599
19837
|
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
|
19600
19838
|
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
|
19601
19839
|
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
|
19840
|
+
case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
|
19841
|
+
case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
|
19842
|
+
case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
|
19602
19843
|
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
|
19603
19844
|
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
|
19604
19845
|
case GGUF_TYPE_ARRAY:
|
19605
19846
|
{
|
19606
19847
|
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
|
19607
|
-
|
19848
|
+
|
19849
|
+
if (ctx->header.version == 1) {
|
19850
|
+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
19851
|
+
uint32_t n = 0;
|
19852
|
+
ok = ok && gguf_fread_el(file, &n, sizeof(n), &offset);
|
19853
|
+
kv->value.arr.n = n;
|
19854
|
+
} else {
|
19855
|
+
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
|
19856
|
+
}
|
19608
19857
|
|
19609
19858
|
switch (kv->value.arr.type) {
|
19610
19859
|
case GGUF_TYPE_UINT8:
|
@@ -19614,6 +19863,9 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19614
19863
|
case GGUF_TYPE_UINT32:
|
19615
19864
|
case GGUF_TYPE_INT32:
|
19616
19865
|
case GGUF_TYPE_FLOAT32:
|
19866
|
+
case GGUF_TYPE_UINT64:
|
19867
|
+
case GGUF_TYPE_INT64:
|
19868
|
+
case GGUF_TYPE_FLOAT64:
|
19617
19869
|
case GGUF_TYPE_BOOL:
|
19618
19870
|
{
|
19619
19871
|
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
|
@@ -19648,7 +19900,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19648
19900
|
|
19649
19901
|
// read the tensor infos
|
19650
19902
|
{
|
19651
|
-
ctx->infos =
|
19903
|
+
ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
|
19652
19904
|
|
19653
19905
|
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
|
19654
19906
|
struct gguf_tensor_info * info = &ctx->infos[i];
|
@@ -19660,7 +19912,14 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19660
19912
|
ok = ok && gguf_fread_str(file, &info->name, &offset);
|
19661
19913
|
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
|
19662
19914
|
for (uint32_t j = 0; j < info->n_dims; ++j) {
|
19663
|
-
|
19915
|
+
if (ctx->header.version == 1) {
|
19916
|
+
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
|
19917
|
+
uint32_t t = 0;
|
19918
|
+
ok = ok && gguf_fread_el(file, &t, sizeof(t), &offset);
|
19919
|
+
info->ne[j] = t;
|
19920
|
+
} else {
|
19921
|
+
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
|
19922
|
+
}
|
19664
19923
|
}
|
19665
19924
|
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
|
19666
19925
|
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
|
@@ -19842,7 +20101,7 @@ void gguf_free(struct gguf_context * ctx) {
|
|
19842
20101
|
}
|
19843
20102
|
}
|
19844
20103
|
|
19845
|
-
|
20104
|
+
free(ctx->kv);
|
19846
20105
|
}
|
19847
20106
|
|
19848
20107
|
if (ctx->infos) {
|
@@ -19854,7 +20113,7 @@ void gguf_free(struct gguf_context * ctx) {
|
|
19854
20113
|
}
|
19855
20114
|
}
|
19856
20115
|
|
19857
|
-
|
20116
|
+
free(ctx->infos);
|
19858
20117
|
}
|
19859
20118
|
|
19860
20119
|
GGML_ALIGNED_FREE(ctx);
|
@@ -19954,6 +20213,18 @@ float gguf_get_val_f32(struct gguf_context * ctx, int i) {
|
|
19954
20213
|
return ctx->kv[i].value.float32;
|
19955
20214
|
}
|
19956
20215
|
|
20216
|
+
uint64_t gguf_get_val_u64(struct gguf_context * ctx, int i) {
|
20217
|
+
return ctx->kv[i].value.uint64;
|
20218
|
+
}
|
20219
|
+
|
20220
|
+
int64_t gguf_get_val_i64(struct gguf_context * ctx, int i) {
|
20221
|
+
return ctx->kv[i].value.int64;
|
20222
|
+
}
|
20223
|
+
|
20224
|
+
double gguf_get_val_f64(struct gguf_context * ctx, int i) {
|
20225
|
+
return ctx->kv[i].value.float64;
|
20226
|
+
}
|
20227
|
+
|
19957
20228
|
bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
|
19958
20229
|
return ctx->kv[i].value.bool_;
|
19959
20230
|
}
|
@@ -20000,7 +20271,7 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
|
|
20000
20271
|
const int n_kv = gguf_get_n_kv(ctx);
|
20001
20272
|
|
20002
20273
|
ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct gguf_kv));
|
20003
|
-
ctx->kv[n_kv].key.n = strlen(key)
|
20274
|
+
ctx->kv[n_kv].key.n = strlen(key);
|
20004
20275
|
ctx->kv[n_kv].key.data = strdup(key);
|
20005
20276
|
ctx->header.n_kv++;
|
20006
20277
|
|
@@ -20056,6 +20327,27 @@ void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
|
|
20056
20327
|
ctx->kv[idx].value.float32 = val;
|
20057
20328
|
}
|
20058
20329
|
|
20330
|
+
void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
|
20331
|
+
const int idx = gguf_get_or_add_key(ctx, key);
|
20332
|
+
|
20333
|
+
ctx->kv[idx].type = GGUF_TYPE_UINT64;
|
20334
|
+
ctx->kv[idx].value.uint64 = val;
|
20335
|
+
}
|
20336
|
+
|
20337
|
+
void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
|
20338
|
+
const int idx = gguf_get_or_add_key(ctx, key);
|
20339
|
+
|
20340
|
+
ctx->kv[idx].type = GGUF_TYPE_INT64;
|
20341
|
+
ctx->kv[idx].value.int64 = val;
|
20342
|
+
}
|
20343
|
+
|
20344
|
+
void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
|
20345
|
+
const int idx = gguf_get_or_add_key(ctx, key);
|
20346
|
+
|
20347
|
+
ctx->kv[idx].type = GGUF_TYPE_FLOAT64;
|
20348
|
+
ctx->kv[idx].value.float64 = val;
|
20349
|
+
}
|
20350
|
+
|
20059
20351
|
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
|
20060
20352
|
const int idx = gguf_get_or_add_key(ctx, key);
|
20061
20353
|
|
@@ -20067,7 +20359,7 @@ void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char *
|
|
20067
20359
|
const int idx = gguf_get_or_add_key(ctx, key);
|
20068
20360
|
|
20069
20361
|
ctx->kv[idx].type = GGUF_TYPE_STRING;
|
20070
|
-
ctx->kv[idx].value.str.n = strlen(val)
|
20362
|
+
ctx->kv[idx].value.str.n = strlen(val);
|
20071
20363
|
ctx->kv[idx].value.str.data = strdup(val);
|
20072
20364
|
}
|
20073
20365
|
|
@@ -20090,7 +20382,7 @@ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char **
|
|
20090
20382
|
ctx->kv[idx].value.arr.data = malloc(n*sizeof(struct gguf_str));
|
20091
20383
|
for (int i = 0; i < n; i++) {
|
20092
20384
|
struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
|
20093
|
-
str->n = strlen(data[i])
|
20385
|
+
str->n = strlen(data[i]);
|
20094
20386
|
str->data = strdup(data[i]);
|
20095
20387
|
}
|
20096
20388
|
}
|
@@ -20106,6 +20398,9 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
|
20106
20398
|
case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
|
20107
20399
|
case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
|
20108
20400
|
case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
|
20401
|
+
case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break;
|
20402
|
+
case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break;
|
20403
|
+
case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
|
20109
20404
|
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
|
20110
20405
|
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
|
20111
20406
|
case GGUF_TYPE_ARRAY:
|
@@ -20134,7 +20429,7 @@ void gguf_add_tensor(
|
|
20134
20429
|
const int idx = ctx->header.n_tensors;
|
20135
20430
|
ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
|
20136
20431
|
|
20137
|
-
ctx->infos[idx].name.n = strlen(tensor->name)
|
20432
|
+
ctx->infos[idx].name.n = strlen(tensor->name);
|
20138
20433
|
ctx->infos[idx].name.data = strdup(tensor->name);
|
20139
20434
|
|
20140
20435
|
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
|
@@ -20267,6 +20562,9 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
|
|
20267
20562
|
case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
|
20268
20563
|
case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
|
20269
20564
|
case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
|
20565
|
+
case GGUF_TYPE_UINT64: gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break;
|
20566
|
+
case GGUF_TYPE_INT64: gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break;
|
20567
|
+
case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
|
20270
20568
|
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
|
20271
20569
|
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
|
20272
20570
|
case GGUF_TYPE_ARRAY:
|
@@ -20282,6 +20580,9 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
|
|
20282
20580
|
case GGUF_TYPE_UINT32:
|
20283
20581
|
case GGUF_TYPE_INT32:
|
20284
20582
|
case GGUF_TYPE_FLOAT32:
|
20583
|
+
case GGUF_TYPE_UINT64:
|
20584
|
+
case GGUF_TYPE_INT64:
|
20585
|
+
case GGUF_TYPE_FLOAT64:
|
20285
20586
|
case GGUF_TYPE_BOOL:
|
20286
20587
|
{
|
20287
20588
|
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
|
@@ -20516,6 +20817,14 @@ int ggml_cpu_has_sse3(void) {
|
|
20516
20817
|
#endif
|
20517
20818
|
}
|
20518
20819
|
|
20820
|
+
int ggml_cpu_has_ssse3(void) {
|
20821
|
+
#if defined(__SSSE3__)
|
20822
|
+
return 1;
|
20823
|
+
#else
|
20824
|
+
return 0;
|
20825
|
+
#endif
|
20826
|
+
}
|
20827
|
+
|
20519
20828
|
int ggml_cpu_has_vsx(void) {
|
20520
20829
|
#if defined(__POWER9_VECTOR__)
|
20521
20830
|
return 1;
|