llama_cpp 0.5.1 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -3
- data/examples/prompt_jp.txt +1 -1
- data/ext/llama_cpp/extconf.rb +1 -1
- data/ext/llama_cpp/llama_cpp.cpp +30 -0
- data/ext/llama_cpp/src/ggml-alloc.c +0 -5
- data/ext/llama_cpp/src/ggml-cuda.cu +1011 -655
- data/ext/llama_cpp/src/ggml-metal.m +57 -15
- data/ext/llama_cpp/src/ggml-metal.metal +271 -137
- data/ext/llama_cpp/src/ggml.c +7 -3
- data/ext/llama_cpp/src/ggml.h +1 -1
- data/ext/llama_cpp/src/k_quants.c +4 -1
- data/ext/llama_cpp/src/llama.cpp +617 -141
- data/ext/llama_cpp/src/llama.h +8 -6
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -1
- data/sig/llama_cpp.rbs +4 -0
- metadata +2 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -270,7 +270,7 @@ extern "C" {
|
|
270
270
|
|
271
271
|
#if defined(__ARM_NEON) && defined(__CUDACC__)
|
272
272
|
typedef half ggml_fp16_t;
|
273
|
-
#elif defined(__ARM_NEON)
|
273
|
+
#elif defined(__ARM_NEON) && !defined(_MSC_VER)
|
274
274
|
typedef __fp16 ggml_fp16_t;
|
275
275
|
#else
|
276
276
|
typedef uint16_t ggml_fp16_t;
|
@@ -2609,7 +2609,10 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
|
|
2609
2609
|
|
2610
2610
|
memcpy(utmp, x[i].scales, 12);
|
2611
2611
|
|
2612
|
-
|
2612
|
+
uint32x2_t mins8 = { 0 };
|
2613
|
+
mins8 = vset_lane_u32(utmp[1] & kmask1, mins8, 0);
|
2614
|
+
mins8 = vset_lane_u32(((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4), mins8, 1);
|
2615
|
+
|
2613
2616
|
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
|
2614
2617
|
utmp[0] &= kmask1;
|
2615
2618
|
|