cui-llama.rn 1.1.2 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cpp/ggml.h CHANGED
@@ -220,7 +220,7 @@
220
220
  #include <stdio.h>
221
221
 
222
222
  #define LM_GGML_FILE_MAGIC 0x67676d6c // "ggml"
223
- #define LM_GGML_FILE_VERSION 1
223
+ #define LM_GGML_FILE_VERSION 2
224
224
 
225
225
  #define LM_GGML_QNT_VERSION 2 // bump this on quantization format changes
226
226
  #define LM_GGML_QNT_VERSION_FACTOR 1000 // do not change this
@@ -231,6 +231,8 @@
231
231
  #define LM_GGML_MAX_SRC 10
232
232
  #ifndef LM_GGML_MAX_NAME
233
233
  #define LM_GGML_MAX_NAME 64
234
+ #define LM_GGML_MAX_N_THREADS 512
235
+
234
236
  #endif
235
237
  #define LM_GGML_MAX_OP_PARAMS 64
236
238
  #define LM_GGML_DEFAULT_N_THREADS 4
@@ -356,6 +358,7 @@ extern "C" {
356
358
 
357
359
  struct lm_ggml_object;
358
360
  struct lm_ggml_context;
361
+ struct lm_ggml_cgraph;
359
362
 
360
363
  // NOTE: always add types at the end of the enum to keep backward compatibility
361
364
  enum lm_ggml_type {
@@ -393,6 +396,8 @@ extern "C" {
393
396
  LM_GGML_TYPE_Q4_0_4_4 = 31,
394
397
  LM_GGML_TYPE_Q4_0_4_8 = 32,
395
398
  LM_GGML_TYPE_Q4_0_8_8 = 33,
399
+ LM_GGML_TYPE_TQ1_0 = 34,
400
+ LM_GGML_TYPE_TQ2_0 = 35,
396
401
  LM_GGML_TYPE_COUNT,
397
402
  };
398
403
 
@@ -453,6 +458,8 @@ extern "C" {
453
458
  LM_GGML_OP_SQR,
454
459
  LM_GGML_OP_SQRT,
455
460
  LM_GGML_OP_LOG,
461
+ LM_GGML_OP_SIN,
462
+ LM_GGML_OP_COS,
456
463
  LM_GGML_OP_SUM,
457
464
  LM_GGML_OP_SUM_ROWS,
458
465
  LM_GGML_OP_MEAN,
@@ -490,9 +497,11 @@ extern "C" {
490
497
  LM_GGML_OP_CLAMP,
491
498
  LM_GGML_OP_CONV_TRANSPOSE_1D,
492
499
  LM_GGML_OP_IM2COL,
500
+ LM_GGML_OP_IM2COL_BACK,
493
501
  LM_GGML_OP_CONV_TRANSPOSE_2D,
494
502
  LM_GGML_OP_POOL_1D,
495
503
  LM_GGML_OP_POOL_2D,
504
+ LM_GGML_OP_POOL_2D_BACK,
496
505
  LM_GGML_OP_UPSCALE, // nearest interpolate
497
506
  LM_GGML_OP_PAD,
498
507
  LM_GGML_OP_ARANGE,
@@ -508,6 +517,7 @@ extern "C" {
508
517
  LM_GGML_OP_WIN_UNPART,
509
518
  LM_GGML_OP_GET_REL_POS,
510
519
  LM_GGML_OP_ADD_REL_POS,
520
+ LM_GGML_OP_RWKV_WKV,
511
521
 
512
522
  LM_GGML_OP_UNARY,
513
523
 
@@ -542,6 +552,7 @@ extern "C" {
542
552
  LM_GGML_UNARY_OP_SILU,
543
553
  LM_GGML_UNARY_OP_HARDSWISH,
544
554
  LM_GGML_UNARY_OP_HARDSIGMOID,
555
+ LM_GGML_UNARY_OP_EXP,
545
556
 
546
557
  LM_GGML_UNARY_OP_COUNT,
547
558
  };
@@ -565,23 +576,9 @@ extern "C" {
565
576
  LM_GGML_TENSOR_FLAG_PARAM = 4,
566
577
  };
567
578
 
568
- // ggml object
569
- struct lm_ggml_object {
570
- size_t offs;
571
- size_t size;
572
-
573
- struct lm_ggml_object * next;
574
-
575
- enum lm_ggml_object_type type;
576
-
577
- char padding[4];
578
- };
579
-
580
- static const size_t LM_GGML_OBJECT_SIZE = sizeof(struct lm_ggml_object);
581
-
582
579
  // n-dimensional tensor
583
580
  struct lm_ggml_tensor {
584
- enum lm_ggml_type type;
581
+ enum lm_ggml_type type;
585
582
 
586
583
  LM_GGML_DEPRECATED(enum lm_ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
587
584
 
@@ -624,6 +621,29 @@ extern "C" {
624
621
  // If it returns true, the computation is aborted
625
622
  typedef bool (*lm_ggml_abort_callback)(void * data);
626
623
 
624
+ // Scheduling priorities
625
+ enum lm_ggml_sched_priority {
626
+ LM_GGML_SCHED_PRIO_NORMAL,
627
+ LM_GGML_SCHED_PRIO_MEDIUM,
628
+ LM_GGML_SCHED_PRIO_HIGH,
629
+ LM_GGML_SCHED_PRIO_REALTIME
630
+ };
631
+
632
+ // Threadpool params
633
+ // Use lm_ggml_threadpool_params_default() or lm_ggml_threadpool_params_init() to populate the defaults
634
+ struct lm_ggml_threadpool_params {
635
+ bool cpumask[LM_GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
636
+ int n_threads; // number of threads
637
+ enum lm_ggml_sched_priority prio; // thread priority
638
+ uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
639
+ bool strict_cpu; // strict cpu placement
640
+ bool paused; // start in paused state
641
+ };
642
+
643
+ struct lm_ggml_threadpool; // forward declaration, see ggml.c
644
+
645
+ typedef struct lm_ggml_threadpool * lm_ggml_threadpool_t;
646
+
627
647
  // the compute plan that needs to be prepared for lm_ggml_graph_compute()
628
648
  // since https://github.com/ggerganov/ggml/issues/287
629
649
  struct lm_ggml_cplan {
@@ -631,41 +651,13 @@ extern "C" {
631
651
  uint8_t * work_data; // work buffer, to be allocated by caller before calling to `lm_ggml_graph_compute()`
632
652
 
633
653
  int n_threads;
654
+ struct lm_ggml_threadpool * threadpool;
634
655
 
635
656
  // abort lm_ggml_graph_compute when true
636
657
  lm_ggml_abort_callback abort_callback;
637
658
  void * abort_callback_data;
638
659
  };
639
660
 
640
- enum lm_ggml_cgraph_eval_order {
641
- LM_GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
642
- LM_GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
643
- LM_GGML_CGRAPH_EVAL_ORDER_COUNT
644
- };
645
-
646
- typedef uint32_t lm_ggml_bitset_t;
647
-
648
- struct lm_ggml_hash_set {
649
- size_t size;
650
- lm_ggml_bitset_t * used;
651
- struct lm_ggml_tensor ** keys;
652
- };
653
-
654
- // computation graph
655
- struct lm_ggml_cgraph {
656
- int size;
657
- int n_nodes;
658
- int n_leafs;
659
-
660
- struct lm_ggml_tensor ** nodes;
661
- struct lm_ggml_tensor ** grads;
662
- struct lm_ggml_tensor ** leafs;
663
-
664
- struct lm_ggml_hash_set visited_hash_set;
665
-
666
- enum lm_ggml_cgraph_eval_order order;
667
- };
668
-
669
661
  // scratch buffer
670
662
  struct lm_ggml_scratch {
671
663
  size_t offs;
@@ -969,6 +961,22 @@ extern "C" {
969
961
  struct lm_ggml_context * ctx,
970
962
  struct lm_ggml_tensor * a);
971
963
 
964
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_sin(
965
+ struct lm_ggml_context * ctx,
966
+ struct lm_ggml_tensor * a);
967
+
968
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_sin_inplace(
969
+ struct lm_ggml_context * ctx,
970
+ struct lm_ggml_tensor * a);
971
+
972
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_cos(
973
+ struct lm_ggml_context * ctx,
974
+ struct lm_ggml_tensor * a);
975
+
976
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_cos_inplace(
977
+ struct lm_ggml_context * ctx,
978
+ struct lm_ggml_tensor * a);
979
+
972
980
  // return scalar
973
981
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_sum(
974
982
  struct lm_ggml_context * ctx,
@@ -1119,6 +1127,14 @@ extern "C" {
1119
1127
  struct lm_ggml_context * ctx,
1120
1128
  struct lm_ggml_tensor * a);
1121
1129
 
1130
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_exp(
1131
+ struct lm_ggml_context * ctx,
1132
+ struct lm_ggml_tensor * a);
1133
+
1134
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_exp_inplace(
1135
+ struct lm_ggml_context * ctx,
1136
+ struct lm_ggml_tensor * a);
1137
+
1122
1138
  // normalize along rows
1123
1139
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_norm(
1124
1140
  struct lm_ggml_context * ctx,
@@ -1214,7 +1230,7 @@ extern "C" {
1214
1230
  size_t nb1,
1215
1231
  size_t nb2,
1216
1232
  size_t nb3,
1217
- size_t offset);
1233
+ size_t offset); // in bytes
1218
1234
 
1219
1235
  // b -> view(a,offset,nb1,nb2,3), return view(a)
1220
1236
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_inplace(
@@ -1224,19 +1240,19 @@ extern "C" {
1224
1240
  size_t nb1,
1225
1241
  size_t nb2,
1226
1242
  size_t nb3,
1227
- size_t offset);
1243
+ size_t offset); // in bytes
1228
1244
 
1229
1245
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_1d(
1230
1246
  struct lm_ggml_context * ctx,
1231
1247
  struct lm_ggml_tensor * a,
1232
1248
  struct lm_ggml_tensor * b,
1233
- size_t offset);
1249
+ size_t offset); // in bytes
1234
1250
 
1235
1251
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_1d_inplace(
1236
1252
  struct lm_ggml_context * ctx,
1237
1253
  struct lm_ggml_tensor * a,
1238
1254
  struct lm_ggml_tensor * b,
1239
- size_t offset);
1255
+ size_t offset); // in bytes
1240
1256
 
1241
1257
  // b -> view(a,offset,nb1,nb2,3), return modified a
1242
1258
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_2d(
@@ -1244,7 +1260,7 @@ extern "C" {
1244
1260
  struct lm_ggml_tensor * a,
1245
1261
  struct lm_ggml_tensor * b,
1246
1262
  size_t nb1,
1247
- size_t offset);
1263
+ size_t offset); // in bytes
1248
1264
 
1249
1265
  // b -> view(a,offset,nb1,nb2,3), return view(a)
1250
1266
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_2d_inplace(
@@ -1252,7 +1268,7 @@ extern "C" {
1252
1268
  struct lm_ggml_tensor * a,
1253
1269
  struct lm_ggml_tensor * b,
1254
1270
  size_t nb1,
1255
- size_t offset);
1271
+ size_t offset); // in bytes
1256
1272
 
1257
1273
  // a -> b, return view(b)
1258
1274
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_cpy(
@@ -1566,34 +1582,49 @@ extern "C" {
1566
1582
  float min,
1567
1583
  float max);
1568
1584
 
1585
+ // im2col
1586
+ // converts data into a format that effectively results in a convolution when combined with matrix multiplication
1569
1587
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_im2col(
1570
1588
  struct lm_ggml_context * ctx,
1571
- struct lm_ggml_tensor * a,
1572
- struct lm_ggml_tensor * b,
1573
- int s0,
1574
- int s1,
1575
- int p0,
1576
- int p1,
1577
- int d0,
1578
- int d1,
1579
- bool is_2D,
1580
- enum lm_ggml_type dst_type);
1589
+ struct lm_ggml_tensor * a, // convolution kernel
1590
+ struct lm_ggml_tensor * b, // data
1591
+ int s0, // stride dimension 0
1592
+ int s1, // stride dimension 1
1593
+ int p0, // padding dimension 0
1594
+ int p1, // padding dimension 1
1595
+ int d0, // dilation dimension 0
1596
+ int d1, // dilation dimension 1
1597
+ bool is_2D,
1598
+ enum lm_ggml_type dst_type);
1599
+
1600
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_im2col_back(
1601
+ struct lm_ggml_context * ctx,
1602
+ struct lm_ggml_tensor * a, // convolution kernel
1603
+ struct lm_ggml_tensor * b, // gradient of im2col output
1604
+ int64_t * ne, // shape of im2col input
1605
+ int s0, // stride dimension 0
1606
+ int s1, // stride dimension 1
1607
+ int p0, // padding dimension 0
1608
+ int p1, // padding dimension 1
1609
+ int d0, // dilation dimension 0
1610
+ int d1, // dilation dimension 1
1611
+ bool is_2D);
1581
1612
 
1582
1613
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_depthwise_2d(
1583
1614
  struct lm_ggml_context * ctx,
1584
- struct lm_ggml_tensor * a,
1585
- struct lm_ggml_tensor * b,
1586
- int s0,
1587
- int s1,
1588
- int p0,
1589
- int p1,
1590
- int d0,
1591
- int d1);
1615
+ struct lm_ggml_tensor * a, // convolution kernel
1616
+ struct lm_ggml_tensor * b, // data
1617
+ int s0, // stride dimension 0
1618
+ int s1, // stride dimension 1
1619
+ int p0, // padding dimension 0
1620
+ int p1, // padding dimension 1
1621
+ int d0, // dilation dimension 0
1622
+ int d1); // dilation dimension 1
1592
1623
 
1593
1624
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d(
1594
1625
  struct lm_ggml_context * ctx,
1595
- struct lm_ggml_tensor * a,
1596
- struct lm_ggml_tensor * b,
1626
+ struct lm_ggml_tensor * a, // convolution kernel
1627
+ struct lm_ggml_tensor * b, // data
1597
1628
  int s0, // stride
1598
1629
  int p0, // padding
1599
1630
  int d0); // dilation
@@ -1602,29 +1633,29 @@ extern "C" {
1602
1633
  // alias for lm_ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1603
1634
  LM_GGML_API struct lm_ggml_tensor* lm_ggml_conv_1d_ph(
1604
1635
  struct lm_ggml_context * ctx,
1605
- struct lm_ggml_tensor * a,
1606
- struct lm_ggml_tensor * b,
1607
- int s,
1608
- int d);
1636
+ struct lm_ggml_tensor * a, // convolution kernel
1637
+ struct lm_ggml_tensor * b, // data
1638
+ int s, // stride
1639
+ int d); // dilation
1609
1640
 
1610
1641
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
1611
1642
  struct lm_ggml_context * ctx,
1612
- struct lm_ggml_tensor * a,
1613
- struct lm_ggml_tensor * b,
1614
- int s0,
1615
- int p0,
1616
- int d0);
1643
+ struct lm_ggml_tensor * a, // convolution kernel
1644
+ struct lm_ggml_tensor * b, // data
1645
+ int s0, // stride
1646
+ int p0, // padding
1647
+ int d0); // dilation
1617
1648
 
1618
1649
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d(
1619
1650
  struct lm_ggml_context * ctx,
1620
- struct lm_ggml_tensor * a,
1621
- struct lm_ggml_tensor * b,
1622
- int s0,
1623
- int s1,
1624
- int p0,
1625
- int p1,
1626
- int d0,
1627
- int d1);
1651
+ struct lm_ggml_tensor * a, // convolution kernel
1652
+ struct lm_ggml_tensor * b, // data
1653
+ int s0, // stride dimension 0
1654
+ int s1, // stride dimension 1
1655
+ int p0, // padding dimension 0
1656
+ int p1, // padding dimension 1
1657
+ int d0, // dilation dimension 0
1658
+ int d1); // dilation dimension 1
1628
1659
 
1629
1660
 
1630
1661
  // kernel size is a->ne[0] x a->ne[1]
@@ -1686,6 +1717,18 @@ extern "C" {
1686
1717
  float p0,
1687
1718
  float p1);
1688
1719
 
1720
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_pool_2d_back(
1721
+ struct lm_ggml_context * ctx,
1722
+ struct lm_ggml_tensor * a,
1723
+ struct lm_ggml_tensor * af, // "a"/input used in forward pass
1724
+ enum lm_ggml_op_pool op,
1725
+ int k0,
1726
+ int k1,
1727
+ int s0,
1728
+ int s1,
1729
+ float p0,
1730
+ float p1);
1731
+
1689
1732
  // nearest interpolate
1690
1733
  // multiplies ne0 and ne1 by scale factor
1691
1734
  // used in stable-diffusion
@@ -1840,6 +1883,15 @@ extern "C" {
1840
1883
  struct lm_ggml_tensor * pw,
1841
1884
  struct lm_ggml_tensor * ph);
1842
1885
 
1886
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_rwkv_wkv(
1887
+ struct lm_ggml_context * ctx,
1888
+ struct lm_ggml_tensor * k,
1889
+ struct lm_ggml_tensor * v,
1890
+ struct lm_ggml_tensor * r,
1891
+ struct lm_ggml_tensor * tf,
1892
+ struct lm_ggml_tensor * td,
1893
+ struct lm_ggml_tensor * state);
1894
+
1843
1895
  // custom operators
1844
1896
 
1845
1897
  typedef void (*lm_ggml_unary_op_f32_t) (const int, float *, const float *);
@@ -1923,8 +1975,6 @@ extern "C" {
1923
1975
  typedef void (*lm_ggml_custom2_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, int ith, int nth, void * userdata);
1924
1976
  typedef void (*lm_ggml_custom3_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, const struct lm_ggml_tensor * c, int ith, int nth, void * userdata);
1925
1977
 
1926
- #define LM_GGML_N_TASKS_MAX -1
1927
-
1928
1978
  LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom1(
1929
1979
  struct lm_ggml_context * ctx,
1930
1980
  struct lm_ggml_tensor * a,
@@ -1994,26 +2044,44 @@ extern "C" {
1994
2044
  struct lm_ggml_context * ctx,
1995
2045
  struct lm_ggml_tensor * tensor);
1996
2046
 
1997
-
1998
2047
  LM_GGML_API void lm_ggml_build_forward_expand (struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
1999
2048
  LM_GGML_API void lm_ggml_build_backward_expand(struct lm_ggml_context * ctx, struct lm_ggml_cgraph * gf, struct lm_ggml_cgraph * gb, bool keep);
2000
2049
 
2001
2050
  // graph allocation in a context
2002
- LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph (struct lm_ggml_context * ctx); // size = LM_GGML_DEFAULT_GRAPH_SIZE, grads = false
2003
- LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph_custom (struct lm_ggml_context * ctx, size_t size, bool grads);
2004
- LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph);
2005
- LM_GGML_API struct lm_ggml_cgraph lm_ggml_graph_view (struct lm_ggml_cgraph * cgraph, int i0, int i1);
2006
- LM_GGML_API void lm_ggml_graph_cpy (struct lm_ggml_cgraph * src, struct lm_ggml_cgraph * dst);
2007
- LM_GGML_API void lm_ggml_graph_reset (struct lm_ggml_cgraph * cgraph); // zero grads
2008
- LM_GGML_API void lm_ggml_graph_clear (struct lm_ggml_cgraph * cgraph);
2051
+ LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph (struct lm_ggml_context * ctx); // size = LM_GGML_DEFAULT_GRAPH_SIZE, grads = false
2052
+ LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph_custom(struct lm_ggml_context * ctx, size_t size, bool grads);
2053
+ LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph);
2054
+ LM_GGML_API void lm_ggml_graph_cpy (struct lm_ggml_cgraph * src, struct lm_ggml_cgraph * dst);
2055
+ LM_GGML_API void lm_ggml_graph_reset (struct lm_ggml_cgraph * cgraph); // zero grads
2056
+ LM_GGML_API void lm_ggml_graph_clear (struct lm_ggml_cgraph * cgraph);
2057
+
2058
+ LM_GGML_API int lm_ggml_graph_size (struct lm_ggml_cgraph * cgraph);
2059
+ LM_GGML_API struct lm_ggml_tensor * lm_ggml_graph_node (struct lm_ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
2060
+ LM_GGML_API struct lm_ggml_tensor ** lm_ggml_graph_nodes (struct lm_ggml_cgraph * cgraph);
2061
+ LM_GGML_API int lm_ggml_graph_n_nodes(struct lm_ggml_cgraph * cgraph);
2062
+
2063
+ LM_GGML_API void lm_ggml_graph_add_node(struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
2009
2064
 
2010
2065
  LM_GGML_API size_t lm_ggml_graph_overhead(void);
2011
2066
  LM_GGML_API size_t lm_ggml_graph_overhead_custom(size_t size, bool grads);
2012
2067
 
2068
+ LM_GGML_API struct lm_ggml_threadpool_params lm_ggml_threadpool_params_default(int n_threads);
2069
+ LM_GGML_API void lm_ggml_threadpool_params_init (struct lm_ggml_threadpool_params * p, int n_threads);
2070
+ LM_GGML_API bool lm_ggml_threadpool_params_match (const struct lm_ggml_threadpool_params * p0, const struct lm_ggml_threadpool_params * p1);
2071
+ LM_GGML_API struct lm_ggml_threadpool * lm_ggml_threadpool_new (struct lm_ggml_threadpool_params * params);
2072
+ LM_GGML_API void lm_ggml_threadpool_free (struct lm_ggml_threadpool * threadpool);
2073
+ LM_GGML_API int lm_ggml_threadpool_get_n_threads(struct lm_ggml_threadpool * threadpool);
2074
+ LM_GGML_API void lm_ggml_threadpool_pause (struct lm_ggml_threadpool * threadpool);
2075
+ LM_GGML_API void lm_ggml_threadpool_resume (struct lm_ggml_threadpool * threadpool);
2076
+
2013
2077
  // lm_ggml_graph_plan() has to be called before lm_ggml_graph_compute()
2014
2078
  // when plan.work_size > 0, caller must allocate memory for plan.work_data
2015
- LM_GGML_API struct lm_ggml_cplan lm_ggml_graph_plan (const struct lm_ggml_cgraph * cgraph, int n_threads /*= LM_GGML_DEFAULT_N_THREADS*/);
2016
- LM_GGML_API enum lm_ggml_status lm_ggml_graph_compute( struct lm_ggml_cgraph * cgraph, struct lm_ggml_cplan * cplan);
2079
+ LM_GGML_API struct lm_ggml_cplan lm_ggml_graph_plan(
2080
+ const struct lm_ggml_cgraph * cgraph,
2081
+ int n_threads, /* = LM_GGML_DEFAULT_N_THREADS */
2082
+ struct lm_ggml_threadpool * threadpool /* = NULL */ );
2083
+ LM_GGML_API enum lm_ggml_status lm_ggml_graph_compute(struct lm_ggml_cgraph * cgraph, struct lm_ggml_cplan * cplan);
2084
+
2017
2085
  // same as lm_ggml_graph_compute() but the work data is allocated as a part of the context
2018
2086
  // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
2019
2087
  LM_GGML_API enum lm_ggml_status lm_ggml_graph_compute_with_ctx(struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph, int n_threads);
@@ -2402,6 +2470,7 @@ extern "C" {
2402
2470
  LM_GGML_API int lm_ggml_cpu_has_gpublas (void);
2403
2471
  LM_GGML_API int lm_ggml_cpu_has_sse3 (void);
2404
2472
  LM_GGML_API int lm_ggml_cpu_has_ssse3 (void);
2473
+ LM_GGML_API int lm_ggml_cpu_has_riscv_v (void);
2405
2474
  LM_GGML_API int lm_ggml_cpu_has_sycl (void);
2406
2475
  LM_GGML_API int lm_ggml_cpu_has_rpc (void);
2407
2476
  LM_GGML_API int lm_ggml_cpu_has_vsx (void);