llama_cpp 0.10.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/ext/llama_cpp/llama_cpp.cpp +2 -0
- data/ext/llama_cpp/src/ggml-alloc.h +1 -1
- data/ext/llama_cpp/src/ggml-cuda.cu +691 -93
- data/ext/llama_cpp/src/ggml-metal.m +535 -54
- data/ext/llama_cpp/src/ggml-metal.metal +1497 -169
- data/ext/llama_cpp/src/ggml-quants.c +2 -2
- data/ext/llama_cpp/src/ggml.c +325 -159
- data/ext/llama_cpp/src/ggml.h +34 -13
- data/ext/llama_cpp/src/llama.cpp +195 -35
- data/ext/llama_cpp/src/llama.h +1 -1
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- metadata +2 -2
@@ -3114,7 +3114,7 @@ void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restri
|
|
3114
3114
|
|
3115
3115
|
size_t vl = __riscv_vsetvl_e8m1(qk/2);
|
3116
3116
|
|
3117
|
-
// These
|
3117
|
+
// These temporary registers are for masking and shift operations
|
3118
3118
|
vuint32m2_t vt_1 = __riscv_vid_v_u32m2(vl);
|
3119
3119
|
vuint32m2_t vt_2 = __riscv_vsll_vv_u32m2(__riscv_vmv_v_x_u32m2(1, vl), vt_1, vl);
|
3120
3120
|
|
@@ -4757,7 +4757,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
|
|
4757
4757
|
|
4758
4758
|
vl = 16;
|
4759
4759
|
|
4760
|
-
//
|
4760
|
+
// retrieve lane to multiply with scale
|
4761
4761
|
vint32m2_t aux0_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 0), (scale[0]), vl);
|
4762
4762
|
vint32m2_t aux0_1 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 1), (scale[1]), vl);
|
4763
4763
|
vint32m2_t aux1_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a1, 0), (scale[2]), vl);
|