llama_cpp 0.15.1 → 0.15.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/vendor/tmp/llama.cpp/Makefile +3 -3
- data/vendor/tmp/llama.cpp/ggml-backend.c +2 -3
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +15 -7
- data/vendor/tmp/llama.cpp/ggml-impl.h +7 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +9 -3
- data/vendor/tmp/llama.cpp/ggml-metal.m +114 -125
- data/vendor/tmp/llama.cpp/ggml-metal.metal +86 -109
- data/vendor/tmp/llama.cpp/ggml-quants.c +2202 -28
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +1032 -0
- data/vendor/tmp/llama.cpp/ggml-rpc.h +24 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +24 -143
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +4 -2
- data/vendor/tmp/llama.cpp/ggml.c +726 -646
- data/vendor/tmp/llama.cpp/ggml.h +28 -17
- data/vendor/tmp/llama.cpp/llama.cpp +478 -281
- data/vendor/tmp/llama.cpp/llama.h +3 -0
- data/vendor/tmp/llama.cpp/unicode-data.cpp +6969 -2169
- data/vendor/tmp/llama.cpp/unicode-data.h +15 -12
- data/vendor/tmp/llama.cpp/unicode.cpp +89 -111
- data/vendor/tmp/llama.cpp/unicode.h +44 -12
- metadata +4 -2
data/vendor/tmp/llama.cpp/ggml.h
CHANGED
@@ -468,7 +468,6 @@ extern "C" {
|
|
468
468
|
GGML_OP_SOFT_MAX_BACK,
|
469
469
|
GGML_OP_ROPE,
|
470
470
|
GGML_OP_ROPE_BACK,
|
471
|
-
GGML_OP_ALIBI,
|
472
471
|
GGML_OP_CLAMP,
|
473
472
|
GGML_OP_CONV_TRANSPOSE_1D,
|
474
473
|
GGML_OP_IM2COL,
|
@@ -520,6 +519,7 @@ extern "C" {
|
|
520
519
|
GGML_UNARY_OP_TANH,
|
521
520
|
GGML_UNARY_OP_ELU,
|
522
521
|
GGML_UNARY_OP_RELU,
|
522
|
+
GGML_UNARY_OP_SIGMOID,
|
523
523
|
GGML_UNARY_OP_GELU,
|
524
524
|
GGML_UNARY_OP_GELU_QUICK,
|
525
525
|
GGML_UNARY_OP_SILU,
|
@@ -565,7 +565,8 @@ extern "C" {
|
|
565
565
|
// n-dimensional tensor
|
566
566
|
struct ggml_tensor {
|
567
567
|
enum ggml_type type;
|
568
|
-
|
568
|
+
|
569
|
+
GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
|
569
570
|
|
570
571
|
struct ggml_backend_buffer * buffer;
|
571
572
|
|
@@ -766,7 +767,8 @@ extern "C" {
|
|
766
767
|
GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
|
767
768
|
GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
|
768
769
|
|
769
|
-
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
770
|
+
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
771
|
+
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
770
772
|
|
771
773
|
// use this to compute the memory overhead of a tensor
|
772
774
|
GGML_API size_t ggml_tensor_overhead(void);
|
@@ -1074,6 +1076,14 @@ extern "C" {
|
|
1074
1076
|
struct ggml_context * ctx,
|
1075
1077
|
struct ggml_tensor * a);
|
1076
1078
|
|
1079
|
+
GGML_API struct ggml_tensor * ggml_sigmoid(
|
1080
|
+
struct ggml_context * ctx,
|
1081
|
+
struct ggml_tensor * a);
|
1082
|
+
|
1083
|
+
GGML_API struct ggml_tensor * ggml_sigmoid_inplace(
|
1084
|
+
struct ggml_context * ctx,
|
1085
|
+
struct ggml_tensor * a);
|
1086
|
+
|
1077
1087
|
GGML_API struct ggml_tensor * ggml_gelu(
|
1078
1088
|
struct ggml_context * ctx,
|
1079
1089
|
struct ggml_tensor * a);
|
@@ -1428,15 +1438,13 @@ extern "C" {
|
|
1428
1438
|
struct ggml_context * ctx,
|
1429
1439
|
struct ggml_tensor * a);
|
1430
1440
|
|
1431
|
-
// fused soft_max(a*scale + mask
|
1441
|
+
// fused soft_max(a*scale + mask*(ALiBi slope))
|
1432
1442
|
// mask is optional
|
1433
|
-
// pos is required when max_bias > 0.0f
|
1434
1443
|
// max_bias = 0.0f for no ALiBi
|
1435
1444
|
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
1436
1445
|
struct ggml_context * ctx,
|
1437
1446
|
struct ggml_tensor * a,
|
1438
1447
|
struct ggml_tensor * mask,
|
1439
|
-
struct ggml_tensor * pos,
|
1440
1448
|
float scale,
|
1441
1449
|
float max_bias);
|
1442
1450
|
|
@@ -1538,16 +1546,6 @@ extern "C" {
|
|
1538
1546
|
float xpos_base,
|
1539
1547
|
bool xpos_down);
|
1540
1548
|
|
1541
|
-
// alibi position embedding
|
1542
|
-
// in-place, returns view(a)
|
1543
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
|
1544
|
-
struct ggml_context * ctx,
|
1545
|
-
struct ggml_tensor * a,
|
1546
|
-
int n_past,
|
1547
|
-
int n_head,
|
1548
|
-
float bias_max),
|
1549
|
-
"use ggml_soft_max_ext instead (will be removed in Mar 2024)");
|
1550
|
-
|
1551
1549
|
// clamp
|
1552
1550
|
// in-place, returns view(a)
|
1553
1551
|
GGML_API struct ggml_tensor * ggml_clamp(
|
@@ -1677,12 +1675,24 @@ extern "C" {
|
|
1677
1675
|
float p1);
|
1678
1676
|
|
1679
1677
|
// nearest interpolate
|
1678
|
+
// multiplies ne0 and ne1 by scale factor
|
1680
1679
|
// used in stable-diffusion
|
1681
1680
|
GGML_API struct ggml_tensor * ggml_upscale(
|
1682
1681
|
struct ggml_context * ctx,
|
1683
1682
|
struct ggml_tensor * a,
|
1684
1683
|
int scale_factor);
|
1685
1684
|
|
1685
|
+
// nearest interpolate
|
1686
|
+
// nearest interpolate to specified dimensions
|
1687
|
+
// used in tortoise.cpp
|
1688
|
+
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
1689
|
+
struct ggml_context * ctx,
|
1690
|
+
struct ggml_tensor * a,
|
1691
|
+
int ne0,
|
1692
|
+
int ne1,
|
1693
|
+
int ne2,
|
1694
|
+
int ne3);
|
1695
|
+
|
1686
1696
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
1687
1697
|
GGML_API struct ggml_tensor * ggml_pad(
|
1688
1698
|
struct ggml_context * ctx,
|
@@ -1744,7 +1754,8 @@ extern "C" {
|
|
1744
1754
|
struct ggml_tensor * k,
|
1745
1755
|
struct ggml_tensor * v,
|
1746
1756
|
struct ggml_tensor * mask,
|
1747
|
-
float scale
|
1757
|
+
float scale,
|
1758
|
+
float max_bias);
|
1748
1759
|
|
1749
1760
|
GGML_API void ggml_flash_attn_ext_set_prec(
|
1750
1761
|
struct ggml_tensor * a,
|