RubyGems - llama_cpp - Versions diffs - 0.10.2 → 0.10.3 - Mend

llama_cpp 0.10.2 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/ext/llama_cpp/src/ggml-alloc.c +1 -1
data/ext/llama_cpp/src/ggml-backend.c +6 -10
data/ext/llama_cpp/src/ggml-cuda.cu +510 -372
data/ext/llama_cpp/src/ggml-quants.c +25 -344
data/ext/llama_cpp/src/ggml.c +7 -8
data/ext/llama_cpp/src/ggml.h +2 -0
data/ext/llama_cpp/src/llama.cpp +432 -39
data/lib/llama_cpp/version.rb +2 -2
data/sig/llama_cpp.rbs +1 -0
metadata +2 -2

data/ext/llama_cpp/src/ggml.c CHANGED Viewed

@@ -4041,7 +4041,6 @@ static struct ggml_tensor * ggml_group_norm_impl(
     result->op = GGML_OP_GROUP_NORM;
     result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
     result->src[0] = a;
-    result->src[1] = NULL; // TODO: maybe store epsilon here?
     return result;
 }
@@ -5541,7 +5540,6 @@ static struct ggml_tensor * ggml_upscale_impl(
     result->op_params[0] = scale_factor;
     result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
     result->src[0] = a;
-    result->src[1] = NULL;
     return result;
 }
@@ -5846,7 +5844,6 @@ struct ggml_tensor * ggml_get_rel_pos(
     result->op   = GGML_OP_GET_REL_POS;
     result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
     result->src[0] = a;
-    result->src[1] = NULL;
     return result;
 }
@@ -10335,7 +10332,8 @@ static void ggml_compute_forward_scale_f32(
     }
     // scale factor
-    const float v = *(float *) dst->op_params;
+    float v;
+    memcpy(&v, dst->op_params, sizeof(float));
     const int ith = params->ith;
     const int nth = params->nth;
@@ -15152,7 +15150,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
             {
                 // necessary for llama
                 if (src0->grad) {
-                    const float s = ((float *) tensor->op_params)[0];
+                    float s;
+                    memcpy(&s, tensor->op_params, sizeof(float));
                     src0->grad =
                         ggml_add_or_set(ctx,
@@ -17454,9 +17453,9 @@ static void ggml_opt_acc_grad(int np, struct ggml_tensor * const ps[], float * g
 }
 //
-// ADAM
+// Using AdamW - ref: https://arxiv.org/pdf/1711.05101v3.pdf
 //
-//   ref: https://arxiv.org/pdf/1412.6980.pdf
+// (Original Adam - ref: https://arxiv.org/pdf/1412.6980.pdf)
 //
 static enum ggml_opt_result ggml_opt_adam(
@@ -19349,7 +19348,7 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
                             data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
                         }
                         gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
-                        free(data);
+                        free((void *)data);
                     } else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
                         GGML_ASSERT(false && "nested arrays not supported");
                     } else {

data/ext/llama_cpp/src/ggml.h CHANGED Viewed

@@ -255,6 +255,8 @@
 #define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
 #elif defined(__GNUC__)
 #define GGML_UNREACHABLE() __builtin_unreachable()
+#elif defined(_MSC_VER)
+#define GGML_UNREACHABLE() __assume(0)
 #else
 #define GGML_UNREACHABLE() ((void) 0)
 #endif