llama_cpp 0.15.1 → 0.15.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -468,7 +468,6 @@ extern "C" {
468
468
  GGML_OP_SOFT_MAX_BACK,
469
469
  GGML_OP_ROPE,
470
470
  GGML_OP_ROPE_BACK,
471
- GGML_OP_ALIBI,
472
471
  GGML_OP_CLAMP,
473
472
  GGML_OP_CONV_TRANSPOSE_1D,
474
473
  GGML_OP_IM2COL,
@@ -520,6 +519,7 @@ extern "C" {
520
519
  GGML_UNARY_OP_TANH,
521
520
  GGML_UNARY_OP_ELU,
522
521
  GGML_UNARY_OP_RELU,
522
+ GGML_UNARY_OP_SIGMOID,
523
523
  GGML_UNARY_OP_GELU,
524
524
  GGML_UNARY_OP_GELU_QUICK,
525
525
  GGML_UNARY_OP_SILU,
@@ -565,7 +565,8 @@ extern "C" {
565
565
  // n-dimensional tensor
566
566
  struct ggml_tensor {
567
567
  enum ggml_type type;
568
- enum ggml_backend_type backend;
568
+
569
+ GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
569
570
 
570
571
  struct ggml_backend_buffer * buffer;
571
572
 
@@ -766,7 +767,8 @@ extern "C" {
766
767
  GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
767
768
  GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
768
769
 
769
- GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
770
+ GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
771
+ GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
770
772
 
771
773
  // use this to compute the memory overhead of a tensor
772
774
  GGML_API size_t ggml_tensor_overhead(void);
@@ -1074,6 +1076,14 @@ extern "C" {
1074
1076
  struct ggml_context * ctx,
1075
1077
  struct ggml_tensor * a);
1076
1078
 
1079
+ GGML_API struct ggml_tensor * ggml_sigmoid(
1080
+ struct ggml_context * ctx,
1081
+ struct ggml_tensor * a);
1082
+
1083
+ GGML_API struct ggml_tensor * ggml_sigmoid_inplace(
1084
+ struct ggml_context * ctx,
1085
+ struct ggml_tensor * a);
1086
+
1077
1087
  GGML_API struct ggml_tensor * ggml_gelu(
1078
1088
  struct ggml_context * ctx,
1079
1089
  struct ggml_tensor * a);
@@ -1428,15 +1438,13 @@ extern "C" {
1428
1438
  struct ggml_context * ctx,
1429
1439
  struct ggml_tensor * a);
1430
1440
 
1431
- // fused soft_max(a*scale + mask + pos[i]*(ALiBi slope))
1441
+ // fused soft_max(a*scale + mask*(ALiBi slope))
1432
1442
  // mask is optional
1433
- // pos is required when max_bias > 0.0f
1434
1443
  // max_bias = 0.0f for no ALiBi
1435
1444
  GGML_API struct ggml_tensor * ggml_soft_max_ext(
1436
1445
  struct ggml_context * ctx,
1437
1446
  struct ggml_tensor * a,
1438
1447
  struct ggml_tensor * mask,
1439
- struct ggml_tensor * pos,
1440
1448
  float scale,
1441
1449
  float max_bias);
1442
1450
 
@@ -1538,16 +1546,6 @@ extern "C" {
1538
1546
  float xpos_base,
1539
1547
  bool xpos_down);
1540
1548
 
1541
- // alibi position embedding
1542
- // in-place, returns view(a)
1543
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
1544
- struct ggml_context * ctx,
1545
- struct ggml_tensor * a,
1546
- int n_past,
1547
- int n_head,
1548
- float bias_max),
1549
- "use ggml_soft_max_ext instead (will be removed in Mar 2024)");
1550
-
1551
1549
  // clamp
1552
1550
  // in-place, returns view(a)
1553
1551
  GGML_API struct ggml_tensor * ggml_clamp(
@@ -1677,12 +1675,24 @@ extern "C" {
1677
1675
  float p1);
1678
1676
 
1679
1677
  // nearest interpolate
1678
+ // multiplies ne0 and ne1 by scale factor
1680
1679
  // used in stable-diffusion
1681
1680
  GGML_API struct ggml_tensor * ggml_upscale(
1682
1681
  struct ggml_context * ctx,
1683
1682
  struct ggml_tensor * a,
1684
1683
  int scale_factor);
1685
1684
 
1685
+ // nearest interpolate
1686
+ // nearest interpolate to specified dimensions
1687
+ // used in tortoise.cpp
1688
+ GGML_API struct ggml_tensor * ggml_upscale_ext(
1689
+ struct ggml_context * ctx,
1690
+ struct ggml_tensor * a,
1691
+ int ne0,
1692
+ int ne1,
1693
+ int ne2,
1694
+ int ne3);
1695
+
1686
1696
  // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
1687
1697
  GGML_API struct ggml_tensor * ggml_pad(
1688
1698
  struct ggml_context * ctx,
@@ -1744,7 +1754,8 @@ extern "C" {
1744
1754
  struct ggml_tensor * k,
1745
1755
  struct ggml_tensor * v,
1746
1756
  struct ggml_tensor * mask,
1747
- float scale);
1757
+ float scale,
1758
+ float max_bias);
1748
1759
 
1749
1760
  GGML_API void ggml_flash_attn_ext_set_prec(
1750
1761
  struct ggml_tensor * a,