RubyGems - llama_cpp - Versions diffs - 0.5.1 → 0.5.2 - Mend

llama_cpp 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +11 -3
data/examples/prompt_jp.txt +1 -1
data/ext/llama_cpp/extconf.rb +1 -1
data/ext/llama_cpp/llama_cpp.cpp +30 -0
data/ext/llama_cpp/src/ggml-alloc.c +0 -5
data/ext/llama_cpp/src/ggml-cuda.cu +1011 -655
data/ext/llama_cpp/src/ggml-metal.m +57 -15
data/ext/llama_cpp/src/ggml-metal.metal +271 -137
data/ext/llama_cpp/src/ggml.c +7 -3
data/ext/llama_cpp/src/ggml.h +1 -1
data/ext/llama_cpp/src/k_quants.c +4 -1
data/ext/llama_cpp/src/llama.cpp +617 -141
data/ext/llama_cpp/src/llama.h +8 -6
data/lib/llama_cpp/version.rb +2 -2
data/lib/llama_cpp.rb +1 -1
data/sig/llama_cpp.rbs +4 -0
metadata +2 -2

data/ext/llama_cpp/src/ggml.h CHANGED Viewed

@@ -270,7 +270,7 @@ extern "C" {
 #if defined(__ARM_NEON) && defined(__CUDACC__)
     typedef half ggml_fp16_t;
-#elif defined(__ARM_NEON)
+#elif defined(__ARM_NEON) && !defined(_MSC_VER)
     typedef __fp16 ggml_fp16_t;
 #else
     typedef uint16_t ggml_fp16_t;

data/ext/llama_cpp/src/k_quants.c CHANGED Viewed

@@ -2609,7 +2609,10 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
         memcpy(utmp, x[i].scales, 12);
-        const uint32x2_t mins8 = {utmp[1] & kmask1, ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4)};
+        uint32x2_t mins8 = { 0 };
+        mins8 = vset_lane_u32(utmp[1] & kmask1, mins8, 0);
+        mins8 = vset_lane_u32(((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4), mins8, 1);
         utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
         utmp[0] &= kmask1;