llama_cpp 0.9.5 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -215,9 +215,9 @@
215
215
  #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
216
216
 
217
217
  #define GGML_MAX_DIMS 4
218
- #define GGML_MAX_PARAMS 1024
218
+ #define GGML_MAX_PARAMS 2048
219
219
  #define GGML_MAX_CONTEXTS 64
220
- #define GGML_MAX_SRC 6
220
+ #define GGML_MAX_SRC 10
221
221
  #define GGML_MAX_NAME 64
222
222
  #define GGML_MAX_OP_PARAMS 64
223
223
  #define GGML_DEFAULT_N_THREADS 4
@@ -283,6 +283,20 @@
283
283
  const type prefix##3 = (pointer)->array[3]; \
284
284
  GGML_UNUSED(prefix##3);
285
285
 
286
+ #define GGML_TENSOR_UNARY_OP_LOCALS \
287
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
288
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
289
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
290
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
291
+
292
+ #define GGML_TENSOR_BINARY_OP_LOCALS \
293
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
294
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
295
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
296
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
297
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
298
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
299
+
286
300
  #ifdef __cplusplus
287
301
  extern "C" {
288
302
  #endif
@@ -381,6 +395,7 @@ extern "C" {
381
395
  GGML_OP_GROUP_NORM,
382
396
 
383
397
  GGML_OP_MUL_MAT,
398
+ GGML_OP_MUL_MAT_ID,
384
399
  GGML_OP_OUT_PROD,
385
400
 
386
401
  GGML_OP_SCALE,
@@ -407,8 +422,10 @@ extern "C" {
407
422
  GGML_OP_CONV_TRANSPOSE_2D,
408
423
  GGML_OP_POOL_1D,
409
424
  GGML_OP_POOL_2D,
410
-
411
425
  GGML_OP_UPSCALE, // nearest interpolate
426
+ GGML_OP_PAD,
427
+ GGML_OP_ARGSORT,
428
+ GGML_OP_LEAKY_RELU,
412
429
 
413
430
  GGML_OP_FLASH_ATTN,
414
431
  GGML_OP_FLASH_FF,
@@ -448,7 +465,8 @@ extern "C" {
448
465
  GGML_UNARY_OP_GELU,
449
466
  GGML_UNARY_OP_GELU_QUICK,
450
467
  GGML_UNARY_OP_SILU,
451
- GGML_UNARY_OP_LEAKY
468
+
469
+ GGML_UNARY_OP_COUNT,
452
470
  };
453
471
 
454
472
  enum ggml_object_type {
@@ -484,7 +502,6 @@ extern "C" {
484
502
 
485
503
  struct ggml_backend_buffer * buffer;
486
504
 
487
- int n_dims;
488
505
  int64_t ne[GGML_MAX_DIMS]; // number of elements
489
506
  size_t nb[GGML_MAX_DIMS]; // stride in bytes:
490
507
  // nb[0] = ggml_type_size(type)
@@ -516,7 +533,7 @@ extern "C" {
516
533
 
517
534
  void * extra; // extra things e.g. for ggml-cuda.cu
518
535
 
519
- char padding[12];
536
+ char padding[8];
520
537
  };
521
538
 
522
539
  static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
@@ -621,16 +638,22 @@ extern "C" {
621
638
  GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
622
639
  GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
623
640
  GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
624
- GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
625
641
 
626
- GGML_API int ggml_blck_size (enum ggml_type type);
627
- GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block
628
- GGML_API float ggml_type_sizef(enum ggml_type type); // ggml_type_size()/ggml_blck_size() as float
642
+ GGML_API int ggml_blck_size(enum ggml_type type);
643
+ GGML_API size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
644
+ GGML_API size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
645
+
646
+ GGML_DEPRECATED(
647
+ GGML_API double ggml_type_sizef(enum ggml_type type), // ggml_type_size()/ggml_blck_size() as float
648
+ "use ggml_row_size() instead");
629
649
 
630
650
  GGML_API const char * ggml_type_name(enum ggml_type type);
631
651
  GGML_API const char * ggml_op_name (enum ggml_op op);
632
652
  GGML_API const char * ggml_op_symbol(enum ggml_op op);
633
653
 
654
+ GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
655
+ GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
656
+
634
657
  GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
635
658
 
636
659
  GGML_API bool ggml_is_quantized(enum ggml_type type);
@@ -641,6 +664,11 @@ extern "C" {
641
664
  GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
642
665
  GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
643
666
  GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
667
+ GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
668
+ GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
669
+ GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
670
+ GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
671
+ GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
644
672
 
645
673
  GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
646
674
 
@@ -773,6 +801,9 @@ extern "C" {
773
801
  struct ggml_tensor * a,
774
802
  struct ggml_tensor * b);
775
803
 
804
+ // dst = a
805
+ // view(dst, nb1, nb2, nb3, offset) += b
806
+ // return dst
776
807
  GGML_API struct ggml_tensor * ggml_acc(
777
808
  struct ggml_context * ctx,
778
809
  struct ggml_tensor * a,
@@ -937,15 +968,14 @@ extern "C" {
937
968
  struct ggml_context * ctx,
938
969
  struct ggml_tensor * a);
939
970
 
940
- GGML_API struct ggml_tensor * ggml_leaky(
971
+ GGML_API struct ggml_tensor * ggml_leaky_relu(
941
972
  struct ggml_context * ctx,
942
- struct ggml_tensor * a);
973
+ struct ggml_tensor * a, float negative_slope, bool inplace);
943
974
 
944
975
  GGML_API struct ggml_tensor * ggml_relu_inplace(
945
976
  struct ggml_context * ctx,
946
977
  struct ggml_tensor * a);
947
978
 
948
- // TODO: double-check this computation is correct
949
979
  GGML_API struct ggml_tensor * ggml_gelu(
950
980
  struct ggml_context * ctx,
951
981
  struct ggml_tensor * a);
@@ -1027,6 +1057,16 @@ extern "C" {
1027
1057
  struct ggml_tensor * a,
1028
1058
  struct ggml_tensor * b);
1029
1059
 
1060
+ // indirect matrix multiplication
1061
+ // ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
1062
+ GGML_API struct ggml_tensor * ggml_mul_mat_id(
1063
+ struct ggml_context * ctx,
1064
+ struct ggml_tensor * const as[],
1065
+ int n_as,
1066
+ struct ggml_tensor * ids,
1067
+ int id,
1068
+ struct ggml_tensor * b);
1069
+
1030
1070
  // A: m columns, n rows,
1031
1071
  // B: p columns, n rows,
1032
1072
  // result is m columns, p rows
@@ -1234,6 +1274,7 @@ extern "C" {
1234
1274
  struct ggml_context * ctx,
1235
1275
  struct ggml_tensor * a);
1236
1276
 
1277
+ // supports 3D: a->ne[2] == b->ne[1]
1237
1278
  GGML_API struct ggml_tensor * ggml_get_rows(
1238
1279
  struct ggml_context * ctx,
1239
1280
  struct ggml_tensor * a,
@@ -1520,6 +1561,32 @@ extern "C" {
1520
1561
  struct ggml_tensor * a,
1521
1562
  int scale_factor);
1522
1563
 
1564
+ // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
1565
+ GGML_API struct ggml_tensor * ggml_pad(
1566
+ struct ggml_context * ctx,
1567
+ struct ggml_tensor * a,
1568
+ int p0,
1569
+ int p1,
1570
+ int p2,
1571
+ int p3);
1572
+
1573
+ // sort rows
1574
+ enum ggml_sort_order {
1575
+ GGML_SORT_ASC,
1576
+ GGML_SORT_DESC,
1577
+ };
1578
+
1579
+ GGML_API struct ggml_tensor * ggml_argsort(
1580
+ struct ggml_context * ctx,
1581
+ struct ggml_tensor * a,
1582
+ enum ggml_sort_order order);
1583
+
1584
+ // top k elements per row
1585
+ GGML_API struct ggml_tensor * ggml_top_k(
1586
+ struct ggml_context * ctx,
1587
+ struct ggml_tensor * a,
1588
+ int k);
1589
+
1523
1590
  GGML_API struct ggml_tensor * ggml_flash_attn(
1524
1591
  struct ggml_context * ctx,
1525
1592
  struct ggml_tensor * q,
@@ -1581,7 +1648,6 @@ extern "C" {
1581
1648
  int kh);
1582
1649
 
1583
1650
  // used in sam
1584
-
1585
1651
  GGML_API struct ggml_tensor * ggml_add_rel_pos(
1586
1652
  struct ggml_context * ctx,
1587
1653
  struct ggml_tensor * a,
@@ -1756,7 +1822,7 @@ extern "C" {
1756
1822
  GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
1757
1823
  GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
1758
1824
  GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
1759
- GGML_API struct ggml_cgraph * ggml_graph_view (struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i0, int i1);
1825
+ GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
1760
1826
  GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
1761
1827
  GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
1762
1828
  GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);