llama_cpp 0.10.2 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/llama_cpp/src/ggml-alloc.c +1 -1
- data/ext/llama_cpp/src/ggml-backend.c +6 -10
- data/ext/llama_cpp/src/ggml-cuda.cu +510 -372
- data/ext/llama_cpp/src/ggml-quants.c +25 -344
- data/ext/llama_cpp/src/ggml.c +7 -8
- data/ext/llama_cpp/src/ggml.h +2 -0
- data/ext/llama_cpp/src/llama.cpp +432 -39
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +1 -0
- metadata +2 -2
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -4041,7 +4041,6 @@ static struct ggml_tensor * ggml_group_norm_impl(
|
|
4041
4041
|
result->op = GGML_OP_GROUP_NORM;
|
4042
4042
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
4043
4043
|
result->src[0] = a;
|
4044
|
-
result->src[1] = NULL; // TODO: maybe store epsilon here?
|
4045
4044
|
|
4046
4045
|
return result;
|
4047
4046
|
}
|
@@ -5541,7 +5540,6 @@ static struct ggml_tensor * ggml_upscale_impl(
|
|
5541
5540
|
result->op_params[0] = scale_factor;
|
5542
5541
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5543
5542
|
result->src[0] = a;
|
5544
|
-
result->src[1] = NULL;
|
5545
5543
|
|
5546
5544
|
return result;
|
5547
5545
|
}
|
@@ -5846,7 +5844,6 @@ struct ggml_tensor * ggml_get_rel_pos(
|
|
5846
5844
|
result->op = GGML_OP_GET_REL_POS;
|
5847
5845
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5848
5846
|
result->src[0] = a;
|
5849
|
-
result->src[1] = NULL;
|
5850
5847
|
|
5851
5848
|
return result;
|
5852
5849
|
}
|
@@ -10335,7 +10332,8 @@ static void ggml_compute_forward_scale_f32(
|
|
10335
10332
|
}
|
10336
10333
|
|
10337
10334
|
// scale factor
|
10338
|
-
|
10335
|
+
float v;
|
10336
|
+
memcpy(&v, dst->op_params, sizeof(float));
|
10339
10337
|
|
10340
10338
|
const int ith = params->ith;
|
10341
10339
|
const int nth = params->nth;
|
@@ -15152,7 +15150,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15152
15150
|
{
|
15153
15151
|
// necessary for llama
|
15154
15152
|
if (src0->grad) {
|
15155
|
-
|
15153
|
+
float s;
|
15154
|
+
memcpy(&s, tensor->op_params, sizeof(float));
|
15156
15155
|
|
15157
15156
|
src0->grad =
|
15158
15157
|
ggml_add_or_set(ctx,
|
@@ -17454,9 +17453,9 @@ static void ggml_opt_acc_grad(int np, struct ggml_tensor * const ps[], float * g
|
|
17454
17453
|
}
|
17455
17454
|
|
17456
17455
|
//
|
17457
|
-
//
|
17456
|
+
// Using AdamW - ref: https://arxiv.org/pdf/1711.05101v3.pdf
|
17458
17457
|
//
|
17459
|
-
//
|
17458
|
+
// (Original Adam - ref: https://arxiv.org/pdf/1412.6980.pdf)
|
17460
17459
|
//
|
17461
17460
|
|
17462
17461
|
static enum ggml_opt_result ggml_opt_adam(
|
@@ -19349,7 +19348,7 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
|
19349
19348
|
data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
|
19350
19349
|
}
|
19351
19350
|
gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
|
19352
|
-
free(data);
|
19351
|
+
free((void *)data);
|
19353
19352
|
} else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
|
19354
19353
|
GGML_ASSERT(false && "nested arrays not supported");
|
19355
19354
|
} else {
|
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -255,6 +255,8 @@
|
|
255
255
|
#define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
|
256
256
|
#elif defined(__GNUC__)
|
257
257
|
#define GGML_UNREACHABLE() __builtin_unreachable()
|
258
|
+
#elif defined(_MSC_VER)
|
259
|
+
#define GGML_UNREACHABLE() __assume(0)
|
258
260
|
#else
|
259
261
|
#define GGML_UNREACHABLE() ((void) 0)
|
260
262
|
#endif
|