llama_cpp 0.15.1 → 0.15.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/vendor/tmp/llama.cpp/Makefile +3 -3
- data/vendor/tmp/llama.cpp/ggml-backend.c +2 -3
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +15 -7
- data/vendor/tmp/llama.cpp/ggml-impl.h +7 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +9 -3
- data/vendor/tmp/llama.cpp/ggml-metal.m +114 -125
- data/vendor/tmp/llama.cpp/ggml-metal.metal +86 -109
- data/vendor/tmp/llama.cpp/ggml-quants.c +2202 -28
- data/vendor/tmp/llama.cpp/ggml-rpc.cpp +1032 -0
- data/vendor/tmp/llama.cpp/ggml-rpc.h +24 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +24 -143
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +4 -2
- data/vendor/tmp/llama.cpp/ggml.c +726 -646
- data/vendor/tmp/llama.cpp/ggml.h +28 -17
- data/vendor/tmp/llama.cpp/llama.cpp +478 -281
- data/vendor/tmp/llama.cpp/llama.h +3 -0
- data/vendor/tmp/llama.cpp/unicode-data.cpp +6969 -2169
- data/vendor/tmp/llama.cpp/unicode-data.h +15 -12
- data/vendor/tmp/llama.cpp/unicode.cpp +89 -111
- data/vendor/tmp/llama.cpp/unicode.h +44 -12
- metadata +4 -2
data/vendor/tmp/llama.cpp/ggml.h
CHANGED
@@ -468,7 +468,6 @@ extern "C" {
|
|
468
468
|
GGML_OP_SOFT_MAX_BACK,
|
469
469
|
GGML_OP_ROPE,
|
470
470
|
GGML_OP_ROPE_BACK,
|
471
|
-
GGML_OP_ALIBI,
|
472
471
|
GGML_OP_CLAMP,
|
473
472
|
GGML_OP_CONV_TRANSPOSE_1D,
|
474
473
|
GGML_OP_IM2COL,
|
@@ -520,6 +519,7 @@ extern "C" {
|
|
520
519
|
GGML_UNARY_OP_TANH,
|
521
520
|
GGML_UNARY_OP_ELU,
|
522
521
|
GGML_UNARY_OP_RELU,
|
522
|
+
GGML_UNARY_OP_SIGMOID,
|
523
523
|
GGML_UNARY_OP_GELU,
|
524
524
|
GGML_UNARY_OP_GELU_QUICK,
|
525
525
|
GGML_UNARY_OP_SILU,
|
@@ -565,7 +565,8 @@ extern "C" {
|
|
565
565
|
// n-dimensional tensor
|
566
566
|
struct ggml_tensor {
|
567
567
|
enum ggml_type type;
|
568
|
-
|
568
|
+
|
569
|
+
GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
|
569
570
|
|
570
571
|
struct ggml_backend_buffer * buffer;
|
571
572
|
|
@@ -766,7 +767,8 @@ extern "C" {
|
|
766
767
|
GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
|
767
768
|
GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
|
768
769
|
|
769
|
-
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
770
|
+
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
771
|
+
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
770
772
|
|
771
773
|
// use this to compute the memory overhead of a tensor
|
772
774
|
GGML_API size_t ggml_tensor_overhead(void);
|
@@ -1074,6 +1076,14 @@ extern "C" {
|
|
1074
1076
|
struct ggml_context * ctx,
|
1075
1077
|
struct ggml_tensor * a);
|
1076
1078
|
|
1079
|
+
GGML_API struct ggml_tensor * ggml_sigmoid(
|
1080
|
+
struct ggml_context * ctx,
|
1081
|
+
struct ggml_tensor * a);
|
1082
|
+
|
1083
|
+
GGML_API struct ggml_tensor * ggml_sigmoid_inplace(
|
1084
|
+
struct ggml_context * ctx,
|
1085
|
+
struct ggml_tensor * a);
|
1086
|
+
|
1077
1087
|
GGML_API struct ggml_tensor * ggml_gelu(
|
1078
1088
|
struct ggml_context * ctx,
|
1079
1089
|
struct ggml_tensor * a);
|
@@ -1428,15 +1438,13 @@ extern "C" {
|
|
1428
1438
|
struct ggml_context * ctx,
|
1429
1439
|
struct ggml_tensor * a);
|
1430
1440
|
|
1431
|
-
// fused soft_max(a*scale + mask
|
1441
|
+
// fused soft_max(a*scale + mask*(ALiBi slope))
|
1432
1442
|
// mask is optional
|
1433
|
-
// pos is required when max_bias > 0.0f
|
1434
1443
|
// max_bias = 0.0f for no ALiBi
|
1435
1444
|
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
1436
1445
|
struct ggml_context * ctx,
|
1437
1446
|
struct ggml_tensor * a,
|
1438
1447
|
struct ggml_tensor * mask,
|
1439
|
-
struct ggml_tensor * pos,
|
1440
1448
|
float scale,
|
1441
1449
|
float max_bias);
|
1442
1450
|
|
@@ -1538,16 +1546,6 @@ extern "C" {
|
|
1538
1546
|
float xpos_base,
|
1539
1547
|
bool xpos_down);
|
1540
1548
|
|
1541
|
-
// alibi position embedding
|
1542
|
-
// in-place, returns view(a)
|
1543
|
-
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
|
1544
|
-
struct ggml_context * ctx,
|
1545
|
-
struct ggml_tensor * a,
|
1546
|
-
int n_past,
|
1547
|
-
int n_head,
|
1548
|
-
float bias_max),
|
1549
|
-
"use ggml_soft_max_ext instead (will be removed in Mar 2024)");
|
1550
|
-
|
1551
1549
|
// clamp
|
1552
1550
|
// in-place, returns view(a)
|
1553
1551
|
GGML_API struct ggml_tensor * ggml_clamp(
|
@@ -1677,12 +1675,24 @@ extern "C" {
|
|
1677
1675
|
float p1);
|
1678
1676
|
|
1679
1677
|
// nearest interpolate
|
1678
|
+
// multiplies ne0 and ne1 by scale factor
|
1680
1679
|
// used in stable-diffusion
|
1681
1680
|
GGML_API struct ggml_tensor * ggml_upscale(
|
1682
1681
|
struct ggml_context * ctx,
|
1683
1682
|
struct ggml_tensor * a,
|
1684
1683
|
int scale_factor);
|
1685
1684
|
|
1685
|
+
// nearest interpolate
|
1686
|
+
// nearest interpolate to specified dimensions
|
1687
|
+
// used in tortoise.cpp
|
1688
|
+
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
1689
|
+
struct ggml_context * ctx,
|
1690
|
+
struct ggml_tensor * a,
|
1691
|
+
int ne0,
|
1692
|
+
int ne1,
|
1693
|
+
int ne2,
|
1694
|
+
int ne3);
|
1695
|
+
|
1686
1696
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
1687
1697
|
GGML_API struct ggml_tensor * ggml_pad(
|
1688
1698
|
struct ggml_context * ctx,
|
@@ -1744,7 +1754,8 @@ extern "C" {
|
|
1744
1754
|
struct ggml_tensor * k,
|
1745
1755
|
struct ggml_tensor * v,
|
1746
1756
|
struct ggml_tensor * mask,
|
1747
|
-
float scale
|
1757
|
+
float scale,
|
1758
|
+
float max_bias);
|
1748
1759
|
|
1749
1760
|
GGML_API void ggml_flash_attn_ext_set_prec(
|
1750
1761
|
struct ggml_tensor * a,
|