llama_cpp 0.15.1 → 0.15.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -468,7 +468,6 @@ extern "C" {
468
468
  GGML_OP_SOFT_MAX_BACK,
469
469
  GGML_OP_ROPE,
470
470
  GGML_OP_ROPE_BACK,
471
- GGML_OP_ALIBI,
472
471
  GGML_OP_CLAMP,
473
472
  GGML_OP_CONV_TRANSPOSE_1D,
474
473
  GGML_OP_IM2COL,
@@ -520,6 +519,7 @@ extern "C" {
520
519
  GGML_UNARY_OP_TANH,
521
520
  GGML_UNARY_OP_ELU,
522
521
  GGML_UNARY_OP_RELU,
522
+ GGML_UNARY_OP_SIGMOID,
523
523
  GGML_UNARY_OP_GELU,
524
524
  GGML_UNARY_OP_GELU_QUICK,
525
525
  GGML_UNARY_OP_SILU,
@@ -565,7 +565,8 @@ extern "C" {
565
565
  // n-dimensional tensor
566
566
  struct ggml_tensor {
567
567
  enum ggml_type type;
568
- enum ggml_backend_type backend;
568
+
569
+ GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
569
570
 
570
571
  struct ggml_backend_buffer * buffer;
571
572
 
@@ -766,7 +767,8 @@ extern "C" {
766
767
  GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
767
768
  GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
768
769
 
769
- GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
770
+ GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
771
+ GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
770
772
 
771
773
  // use this to compute the memory overhead of a tensor
772
774
  GGML_API size_t ggml_tensor_overhead(void);
@@ -1074,6 +1076,14 @@ extern "C" {
1074
1076
  struct ggml_context * ctx,
1075
1077
  struct ggml_tensor * a);
1076
1078
 
1079
+ GGML_API struct ggml_tensor * ggml_sigmoid(
1080
+ struct ggml_context * ctx,
1081
+ struct ggml_tensor * a);
1082
+
1083
+ GGML_API struct ggml_tensor * ggml_sigmoid_inplace(
1084
+ struct ggml_context * ctx,
1085
+ struct ggml_tensor * a);
1086
+
1077
1087
  GGML_API struct ggml_tensor * ggml_gelu(
1078
1088
  struct ggml_context * ctx,
1079
1089
  struct ggml_tensor * a);
@@ -1428,15 +1438,13 @@ extern "C" {
1428
1438
  struct ggml_context * ctx,
1429
1439
  struct ggml_tensor * a);
1430
1440
 
1431
- // fused soft_max(a*scale + mask + pos[i]*(ALiBi slope))
1441
+ // fused soft_max(a*scale + mask*(ALiBi slope))
1432
1442
  // mask is optional
1433
- // pos is required when max_bias > 0.0f
1434
1443
  // max_bias = 0.0f for no ALiBi
1435
1444
  GGML_API struct ggml_tensor * ggml_soft_max_ext(
1436
1445
  struct ggml_context * ctx,
1437
1446
  struct ggml_tensor * a,
1438
1447
  struct ggml_tensor * mask,
1439
- struct ggml_tensor * pos,
1440
1448
  float scale,
1441
1449
  float max_bias);
1442
1450
 
@@ -1538,16 +1546,6 @@ extern "C" {
1538
1546
  float xpos_base,
1539
1547
  bool xpos_down);
1540
1548
 
1541
- // alibi position embedding
1542
- // in-place, returns view(a)
1543
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
1544
- struct ggml_context * ctx,
1545
- struct ggml_tensor * a,
1546
- int n_past,
1547
- int n_head,
1548
- float bias_max),
1549
- "use ggml_soft_max_ext instead (will be removed in Mar 2024)");
1550
-
1551
1549
  // clamp
1552
1550
  // in-place, returns view(a)
1553
1551
  GGML_API struct ggml_tensor * ggml_clamp(
@@ -1677,12 +1675,24 @@ extern "C" {
1677
1675
  float p1);
1678
1676
 
1679
1677
  // nearest interpolate
1678
+ // multiplies ne0 and ne1 by scale factor
1680
1679
  // used in stable-diffusion
1681
1680
  GGML_API struct ggml_tensor * ggml_upscale(
1682
1681
  struct ggml_context * ctx,
1683
1682
  struct ggml_tensor * a,
1684
1683
  int scale_factor);
1685
1684
 
1685
+ // nearest interpolate
1686
+ // nearest interpolate to specified dimensions
1687
+ // used in tortoise.cpp
1688
+ GGML_API struct ggml_tensor * ggml_upscale_ext(
1689
+ struct ggml_context * ctx,
1690
+ struct ggml_tensor * a,
1691
+ int ne0,
1692
+ int ne1,
1693
+ int ne2,
1694
+ int ne3);
1695
+
1686
1696
  // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
1687
1697
  GGML_API struct ggml_tensor * ggml_pad(
1688
1698
  struct ggml_context * ctx,
@@ -1744,7 +1754,8 @@ extern "C" {
1744
1754
  struct ggml_tensor * k,
1745
1755
  struct ggml_tensor * v,
1746
1756
  struct ggml_tensor * mask,
1747
- float scale);
1757
+ float scale,
1758
+ float max_bias);
1748
1759
 
1749
1760
  GGML_API void ggml_flash_attn_ext_set_prec(
1750
1761
  struct ggml_tensor * a,