llama_cpp 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -3
- data/examples/prompt_jp.txt +1 -1
- data/ext/llama_cpp/extconf.rb +1 -1
- data/ext/llama_cpp/llama_cpp.cpp +30 -0
- data/ext/llama_cpp/src/ggml-alloc.c +0 -5
- data/ext/llama_cpp/src/ggml-cuda.cu +1011 -655
- data/ext/llama_cpp/src/ggml-metal.m +57 -15
- data/ext/llama_cpp/src/ggml-metal.metal +271 -137
- data/ext/llama_cpp/src/ggml.c +7 -3
- data/ext/llama_cpp/src/ggml.h +1 -1
- data/ext/llama_cpp/src/k_quants.c +4 -1
- data/ext/llama_cpp/src/llama.cpp +617 -141
- data/ext/llama_cpp/src/llama.h +8 -6
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -1
- data/sig/llama_cpp.rbs +4 -0
- metadata +2 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -270,7 +270,7 @@ extern "C" {
|
|
270
270
|
|
271
271
|
#if defined(__ARM_NEON) && defined(__CUDACC__)
|
272
272
|
typedef half ggml_fp16_t;
|
273
|
-
#elif defined(__ARM_NEON)
|
273
|
+
#elif defined(__ARM_NEON) && !defined(_MSC_VER)
|
274
274
|
typedef __fp16 ggml_fp16_t;
|
275
275
|
#else
|
276
276
|
typedef uint16_t ggml_fp16_t;
|
@@ -2609,7 +2609,10 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
|
|
2609
2609
|
|
2610
2610
|
memcpy(utmp, x[i].scales, 12);
|
2611
2611
|
|
2612
|
-
|
2612
|
+
uint32x2_t mins8 = { 0 };
|
2613
|
+
mins8 = vset_lane_u32(utmp[1] & kmask1, mins8, 0);
|
2614
|
+
mins8 = vset_lane_u32(((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4), mins8, 1);
|
2615
|
+
|
2613
2616
|
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
|
2614
2617
|
utmp[0] &= kmask1;
|
2615
2618
|
|