cui-llama.rn 1.1.2 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +1 -2
- package/android/src/main/jni.cpp +26 -21
- package/cpp/common.cpp +181 -1584
- package/cpp/common.h +131 -52
- package/cpp/ggml-aarch64.c +612 -0
- package/cpp/ggml-alloc.h +2 -2
- package/cpp/ggml-backend.c +33 -6
- package/cpp/ggml-backend.h +2 -0
- package/cpp/ggml-common.h +20 -0
- package/cpp/ggml-impl.h +36 -7
- package/cpp/ggml-metal.m +68 -8
- package/cpp/ggml-quants.c +932 -50
- package/cpp/ggml-quants.h +15 -0
- package/cpp/ggml.c +1712 -325
- package/cpp/ggml.h +169 -100
- package/cpp/llama-grammar.cpp +721 -122
- package/cpp/llama-grammar.h +120 -15
- package/cpp/llama-impl.h +132 -1
- package/cpp/llama-sampling.cpp +1483 -354
- package/cpp/llama-sampling.h +20 -48
- package/cpp/llama-vocab.cpp +140 -7
- package/cpp/llama-vocab.h +3 -2
- package/cpp/llama.cpp +824 -327
- package/cpp/llama.h +235 -256
- package/cpp/rn-llama.hpp +18 -14
- package/cpp/sampling.cpp +353 -354
- package/cpp/sampling.h +62 -143
- package/cpp/sgemm.cpp +153 -0
- package/package.json +1 -1
- package/cpp/grammar-parser.cpp +0 -539
- package/cpp/grammar-parser.h +0 -29
package/cpp/ggml.h
CHANGED
@@ -220,7 +220,7 @@
|
|
220
220
|
#include <stdio.h>
|
221
221
|
|
222
222
|
#define LM_GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
223
|
-
#define LM_GGML_FILE_VERSION
|
223
|
+
#define LM_GGML_FILE_VERSION 2
|
224
224
|
|
225
225
|
#define LM_GGML_QNT_VERSION 2 // bump this on quantization format changes
|
226
226
|
#define LM_GGML_QNT_VERSION_FACTOR 1000 // do not change this
|
@@ -231,6 +231,8 @@
|
|
231
231
|
#define LM_GGML_MAX_SRC 10
|
232
232
|
#ifndef LM_GGML_MAX_NAME
|
233
233
|
#define LM_GGML_MAX_NAME 64
|
234
|
+
#define LM_GGML_MAX_N_THREADS 512
|
235
|
+
|
234
236
|
#endif
|
235
237
|
#define LM_GGML_MAX_OP_PARAMS 64
|
236
238
|
#define LM_GGML_DEFAULT_N_THREADS 4
|
@@ -356,6 +358,7 @@ extern "C" {
|
|
356
358
|
|
357
359
|
struct lm_ggml_object;
|
358
360
|
struct lm_ggml_context;
|
361
|
+
struct lm_ggml_cgraph;
|
359
362
|
|
360
363
|
// NOTE: always add types at the end of the enum to keep backward compatibility
|
361
364
|
enum lm_ggml_type {
|
@@ -393,6 +396,8 @@ extern "C" {
|
|
393
396
|
LM_GGML_TYPE_Q4_0_4_4 = 31,
|
394
397
|
LM_GGML_TYPE_Q4_0_4_8 = 32,
|
395
398
|
LM_GGML_TYPE_Q4_0_8_8 = 33,
|
399
|
+
LM_GGML_TYPE_TQ1_0 = 34,
|
400
|
+
LM_GGML_TYPE_TQ2_0 = 35,
|
396
401
|
LM_GGML_TYPE_COUNT,
|
397
402
|
};
|
398
403
|
|
@@ -453,6 +458,8 @@ extern "C" {
|
|
453
458
|
LM_GGML_OP_SQR,
|
454
459
|
LM_GGML_OP_SQRT,
|
455
460
|
LM_GGML_OP_LOG,
|
461
|
+
LM_GGML_OP_SIN,
|
462
|
+
LM_GGML_OP_COS,
|
456
463
|
LM_GGML_OP_SUM,
|
457
464
|
LM_GGML_OP_SUM_ROWS,
|
458
465
|
LM_GGML_OP_MEAN,
|
@@ -490,9 +497,11 @@ extern "C" {
|
|
490
497
|
LM_GGML_OP_CLAMP,
|
491
498
|
LM_GGML_OP_CONV_TRANSPOSE_1D,
|
492
499
|
LM_GGML_OP_IM2COL,
|
500
|
+
LM_GGML_OP_IM2COL_BACK,
|
493
501
|
LM_GGML_OP_CONV_TRANSPOSE_2D,
|
494
502
|
LM_GGML_OP_POOL_1D,
|
495
503
|
LM_GGML_OP_POOL_2D,
|
504
|
+
LM_GGML_OP_POOL_2D_BACK,
|
496
505
|
LM_GGML_OP_UPSCALE, // nearest interpolate
|
497
506
|
LM_GGML_OP_PAD,
|
498
507
|
LM_GGML_OP_ARANGE,
|
@@ -508,6 +517,7 @@ extern "C" {
|
|
508
517
|
LM_GGML_OP_WIN_UNPART,
|
509
518
|
LM_GGML_OP_GET_REL_POS,
|
510
519
|
LM_GGML_OP_ADD_REL_POS,
|
520
|
+
LM_GGML_OP_RWKV_WKV,
|
511
521
|
|
512
522
|
LM_GGML_OP_UNARY,
|
513
523
|
|
@@ -542,6 +552,7 @@ extern "C" {
|
|
542
552
|
LM_GGML_UNARY_OP_SILU,
|
543
553
|
LM_GGML_UNARY_OP_HARDSWISH,
|
544
554
|
LM_GGML_UNARY_OP_HARDSIGMOID,
|
555
|
+
LM_GGML_UNARY_OP_EXP,
|
545
556
|
|
546
557
|
LM_GGML_UNARY_OP_COUNT,
|
547
558
|
};
|
@@ -565,23 +576,9 @@ extern "C" {
|
|
565
576
|
LM_GGML_TENSOR_FLAG_PARAM = 4,
|
566
577
|
};
|
567
578
|
|
568
|
-
// ggml object
|
569
|
-
struct lm_ggml_object {
|
570
|
-
size_t offs;
|
571
|
-
size_t size;
|
572
|
-
|
573
|
-
struct lm_ggml_object * next;
|
574
|
-
|
575
|
-
enum lm_ggml_object_type type;
|
576
|
-
|
577
|
-
char padding[4];
|
578
|
-
};
|
579
|
-
|
580
|
-
static const size_t LM_GGML_OBJECT_SIZE = sizeof(struct lm_ggml_object);
|
581
|
-
|
582
579
|
// n-dimensional tensor
|
583
580
|
struct lm_ggml_tensor {
|
584
|
-
enum lm_ggml_type
|
581
|
+
enum lm_ggml_type type;
|
585
582
|
|
586
583
|
LM_GGML_DEPRECATED(enum lm_ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
|
587
584
|
|
@@ -624,6 +621,29 @@ extern "C" {
|
|
624
621
|
// If it returns true, the computation is aborted
|
625
622
|
typedef bool (*lm_ggml_abort_callback)(void * data);
|
626
623
|
|
624
|
+
// Scheduling priorities
|
625
|
+
enum lm_ggml_sched_priority {
|
626
|
+
LM_GGML_SCHED_PRIO_NORMAL,
|
627
|
+
LM_GGML_SCHED_PRIO_MEDIUM,
|
628
|
+
LM_GGML_SCHED_PRIO_HIGH,
|
629
|
+
LM_GGML_SCHED_PRIO_REALTIME
|
630
|
+
};
|
631
|
+
|
632
|
+
// Threadpool params
|
633
|
+
// Use lm_ggml_threadpool_params_default() or lm_ggml_threadpool_params_init() to populate the defaults
|
634
|
+
struct lm_ggml_threadpool_params {
|
635
|
+
bool cpumask[LM_GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
636
|
+
int n_threads; // number of threads
|
637
|
+
enum lm_ggml_sched_priority prio; // thread priority
|
638
|
+
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
639
|
+
bool strict_cpu; // strict cpu placement
|
640
|
+
bool paused; // start in paused state
|
641
|
+
};
|
642
|
+
|
643
|
+
struct lm_ggml_threadpool; // forward declaration, see ggml.c
|
644
|
+
|
645
|
+
typedef struct lm_ggml_threadpool * lm_ggml_threadpool_t;
|
646
|
+
|
627
647
|
// the compute plan that needs to be prepared for lm_ggml_graph_compute()
|
628
648
|
// since https://github.com/ggerganov/ggml/issues/287
|
629
649
|
struct lm_ggml_cplan {
|
@@ -631,41 +651,13 @@ extern "C" {
|
|
631
651
|
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `lm_ggml_graph_compute()`
|
632
652
|
|
633
653
|
int n_threads;
|
654
|
+
struct lm_ggml_threadpool * threadpool;
|
634
655
|
|
635
656
|
// abort lm_ggml_graph_compute when true
|
636
657
|
lm_ggml_abort_callback abort_callback;
|
637
658
|
void * abort_callback_data;
|
638
659
|
};
|
639
660
|
|
640
|
-
enum lm_ggml_cgraph_eval_order {
|
641
|
-
LM_GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
|
642
|
-
LM_GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
|
643
|
-
LM_GGML_CGRAPH_EVAL_ORDER_COUNT
|
644
|
-
};
|
645
|
-
|
646
|
-
typedef uint32_t lm_ggml_bitset_t;
|
647
|
-
|
648
|
-
struct lm_ggml_hash_set {
|
649
|
-
size_t size;
|
650
|
-
lm_ggml_bitset_t * used;
|
651
|
-
struct lm_ggml_tensor ** keys;
|
652
|
-
};
|
653
|
-
|
654
|
-
// computation graph
|
655
|
-
struct lm_ggml_cgraph {
|
656
|
-
int size;
|
657
|
-
int n_nodes;
|
658
|
-
int n_leafs;
|
659
|
-
|
660
|
-
struct lm_ggml_tensor ** nodes;
|
661
|
-
struct lm_ggml_tensor ** grads;
|
662
|
-
struct lm_ggml_tensor ** leafs;
|
663
|
-
|
664
|
-
struct lm_ggml_hash_set visited_hash_set;
|
665
|
-
|
666
|
-
enum lm_ggml_cgraph_eval_order order;
|
667
|
-
};
|
668
|
-
|
669
661
|
// scratch buffer
|
670
662
|
struct lm_ggml_scratch {
|
671
663
|
size_t offs;
|
@@ -969,6 +961,22 @@ extern "C" {
|
|
969
961
|
struct lm_ggml_context * ctx,
|
970
962
|
struct lm_ggml_tensor * a);
|
971
963
|
|
964
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_sin(
|
965
|
+
struct lm_ggml_context * ctx,
|
966
|
+
struct lm_ggml_tensor * a);
|
967
|
+
|
968
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_sin_inplace(
|
969
|
+
struct lm_ggml_context * ctx,
|
970
|
+
struct lm_ggml_tensor * a);
|
971
|
+
|
972
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_cos(
|
973
|
+
struct lm_ggml_context * ctx,
|
974
|
+
struct lm_ggml_tensor * a);
|
975
|
+
|
976
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_cos_inplace(
|
977
|
+
struct lm_ggml_context * ctx,
|
978
|
+
struct lm_ggml_tensor * a);
|
979
|
+
|
972
980
|
// return scalar
|
973
981
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_sum(
|
974
982
|
struct lm_ggml_context * ctx,
|
@@ -1119,6 +1127,14 @@ extern "C" {
|
|
1119
1127
|
struct lm_ggml_context * ctx,
|
1120
1128
|
struct lm_ggml_tensor * a);
|
1121
1129
|
|
1130
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_exp(
|
1131
|
+
struct lm_ggml_context * ctx,
|
1132
|
+
struct lm_ggml_tensor * a);
|
1133
|
+
|
1134
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_exp_inplace(
|
1135
|
+
struct lm_ggml_context * ctx,
|
1136
|
+
struct lm_ggml_tensor * a);
|
1137
|
+
|
1122
1138
|
// normalize along rows
|
1123
1139
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_norm(
|
1124
1140
|
struct lm_ggml_context * ctx,
|
@@ -1214,7 +1230,7 @@ extern "C" {
|
|
1214
1230
|
size_t nb1,
|
1215
1231
|
size_t nb2,
|
1216
1232
|
size_t nb3,
|
1217
|
-
size_t offset);
|
1233
|
+
size_t offset); // in bytes
|
1218
1234
|
|
1219
1235
|
// b -> view(a,offset,nb1,nb2,3), return view(a)
|
1220
1236
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_inplace(
|
@@ -1224,19 +1240,19 @@ extern "C" {
|
|
1224
1240
|
size_t nb1,
|
1225
1241
|
size_t nb2,
|
1226
1242
|
size_t nb3,
|
1227
|
-
size_t offset);
|
1243
|
+
size_t offset); // in bytes
|
1228
1244
|
|
1229
1245
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_1d(
|
1230
1246
|
struct lm_ggml_context * ctx,
|
1231
1247
|
struct lm_ggml_tensor * a,
|
1232
1248
|
struct lm_ggml_tensor * b,
|
1233
|
-
size_t offset);
|
1249
|
+
size_t offset); // in bytes
|
1234
1250
|
|
1235
1251
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_1d_inplace(
|
1236
1252
|
struct lm_ggml_context * ctx,
|
1237
1253
|
struct lm_ggml_tensor * a,
|
1238
1254
|
struct lm_ggml_tensor * b,
|
1239
|
-
size_t offset);
|
1255
|
+
size_t offset); // in bytes
|
1240
1256
|
|
1241
1257
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
1242
1258
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_2d(
|
@@ -1244,7 +1260,7 @@ extern "C" {
|
|
1244
1260
|
struct lm_ggml_tensor * a,
|
1245
1261
|
struct lm_ggml_tensor * b,
|
1246
1262
|
size_t nb1,
|
1247
|
-
size_t offset);
|
1263
|
+
size_t offset); // in bytes
|
1248
1264
|
|
1249
1265
|
// b -> view(a,offset,nb1,nb2,3), return view(a)
|
1250
1266
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_2d_inplace(
|
@@ -1252,7 +1268,7 @@ extern "C" {
|
|
1252
1268
|
struct lm_ggml_tensor * a,
|
1253
1269
|
struct lm_ggml_tensor * b,
|
1254
1270
|
size_t nb1,
|
1255
|
-
size_t offset);
|
1271
|
+
size_t offset); // in bytes
|
1256
1272
|
|
1257
1273
|
// a -> b, return view(b)
|
1258
1274
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_cpy(
|
@@ -1566,34 +1582,49 @@ extern "C" {
|
|
1566
1582
|
float min,
|
1567
1583
|
float max);
|
1568
1584
|
|
1585
|
+
// im2col
|
1586
|
+
// converts data into a format that effectively results in a convolution when combined with matrix multiplication
|
1569
1587
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_im2col(
|
1570
1588
|
struct lm_ggml_context * ctx,
|
1571
|
-
struct lm_ggml_tensor * a,
|
1572
|
-
struct lm_ggml_tensor * b,
|
1573
|
-
int
|
1574
|
-
int
|
1575
|
-
int
|
1576
|
-
int
|
1577
|
-
int
|
1578
|
-
int
|
1579
|
-
bool
|
1580
|
-
enum lm_ggml_type
|
1589
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1590
|
+
struct lm_ggml_tensor * b, // data
|
1591
|
+
int s0, // stride dimension 0
|
1592
|
+
int s1, // stride dimension 1
|
1593
|
+
int p0, // padding dimension 0
|
1594
|
+
int p1, // padding dimension 1
|
1595
|
+
int d0, // dilation dimension 0
|
1596
|
+
int d1, // dilation dimension 1
|
1597
|
+
bool is_2D,
|
1598
|
+
enum lm_ggml_type dst_type);
|
1599
|
+
|
1600
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_im2col_back(
|
1601
|
+
struct lm_ggml_context * ctx,
|
1602
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1603
|
+
struct lm_ggml_tensor * b, // gradient of im2col output
|
1604
|
+
int64_t * ne, // shape of im2col input
|
1605
|
+
int s0, // stride dimension 0
|
1606
|
+
int s1, // stride dimension 1
|
1607
|
+
int p0, // padding dimension 0
|
1608
|
+
int p1, // padding dimension 1
|
1609
|
+
int d0, // dilation dimension 0
|
1610
|
+
int d1, // dilation dimension 1
|
1611
|
+
bool is_2D);
|
1581
1612
|
|
1582
1613
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_depthwise_2d(
|
1583
1614
|
struct lm_ggml_context * ctx,
|
1584
|
-
struct lm_ggml_tensor * a,
|
1585
|
-
struct lm_ggml_tensor * b,
|
1586
|
-
int s0,
|
1587
|
-
int s1,
|
1588
|
-
int p0,
|
1589
|
-
int p1,
|
1590
|
-
int d0,
|
1591
|
-
int d1);
|
1615
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1616
|
+
struct lm_ggml_tensor * b, // data
|
1617
|
+
int s0, // stride dimension 0
|
1618
|
+
int s1, // stride dimension 1
|
1619
|
+
int p0, // padding dimension 0
|
1620
|
+
int p1, // padding dimension 1
|
1621
|
+
int d0, // dilation dimension 0
|
1622
|
+
int d1); // dilation dimension 1
|
1592
1623
|
|
1593
1624
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d(
|
1594
1625
|
struct lm_ggml_context * ctx,
|
1595
|
-
struct lm_ggml_tensor * a,
|
1596
|
-
struct lm_ggml_tensor * b,
|
1626
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1627
|
+
struct lm_ggml_tensor * b, // data
|
1597
1628
|
int s0, // stride
|
1598
1629
|
int p0, // padding
|
1599
1630
|
int d0); // dilation
|
@@ -1602,29 +1633,29 @@ extern "C" {
|
|
1602
1633
|
// alias for lm_ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
1603
1634
|
LM_GGML_API struct lm_ggml_tensor* lm_ggml_conv_1d_ph(
|
1604
1635
|
struct lm_ggml_context * ctx,
|
1605
|
-
struct lm_ggml_tensor * a,
|
1606
|
-
struct lm_ggml_tensor * b,
|
1607
|
-
int s,
|
1608
|
-
int d);
|
1636
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1637
|
+
struct lm_ggml_tensor * b, // data
|
1638
|
+
int s, // stride
|
1639
|
+
int d); // dilation
|
1609
1640
|
|
1610
1641
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
|
1611
1642
|
struct lm_ggml_context * ctx,
|
1612
|
-
struct lm_ggml_tensor * a,
|
1613
|
-
struct lm_ggml_tensor * b,
|
1614
|
-
int s0,
|
1615
|
-
int p0,
|
1616
|
-
int d0);
|
1643
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1644
|
+
struct lm_ggml_tensor * b, // data
|
1645
|
+
int s0, // stride
|
1646
|
+
int p0, // padding
|
1647
|
+
int d0); // dilation
|
1617
1648
|
|
1618
1649
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d(
|
1619
1650
|
struct lm_ggml_context * ctx,
|
1620
|
-
struct lm_ggml_tensor * a,
|
1621
|
-
struct lm_ggml_tensor * b,
|
1622
|
-
int s0,
|
1623
|
-
int s1,
|
1624
|
-
int p0,
|
1625
|
-
int p1,
|
1626
|
-
int d0,
|
1627
|
-
int d1);
|
1651
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1652
|
+
struct lm_ggml_tensor * b, // data
|
1653
|
+
int s0, // stride dimension 0
|
1654
|
+
int s1, // stride dimension 1
|
1655
|
+
int p0, // padding dimension 0
|
1656
|
+
int p1, // padding dimension 1
|
1657
|
+
int d0, // dilation dimension 0
|
1658
|
+
int d1); // dilation dimension 1
|
1628
1659
|
|
1629
1660
|
|
1630
1661
|
// kernel size is a->ne[0] x a->ne[1]
|
@@ -1686,6 +1717,18 @@ extern "C" {
|
|
1686
1717
|
float p0,
|
1687
1718
|
float p1);
|
1688
1719
|
|
1720
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_pool_2d_back(
|
1721
|
+
struct lm_ggml_context * ctx,
|
1722
|
+
struct lm_ggml_tensor * a,
|
1723
|
+
struct lm_ggml_tensor * af, // "a"/input used in forward pass
|
1724
|
+
enum lm_ggml_op_pool op,
|
1725
|
+
int k0,
|
1726
|
+
int k1,
|
1727
|
+
int s0,
|
1728
|
+
int s1,
|
1729
|
+
float p0,
|
1730
|
+
float p1);
|
1731
|
+
|
1689
1732
|
// nearest interpolate
|
1690
1733
|
// multiplies ne0 and ne1 by scale factor
|
1691
1734
|
// used in stable-diffusion
|
@@ -1840,6 +1883,15 @@ extern "C" {
|
|
1840
1883
|
struct lm_ggml_tensor * pw,
|
1841
1884
|
struct lm_ggml_tensor * ph);
|
1842
1885
|
|
1886
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_rwkv_wkv(
|
1887
|
+
struct lm_ggml_context * ctx,
|
1888
|
+
struct lm_ggml_tensor * k,
|
1889
|
+
struct lm_ggml_tensor * v,
|
1890
|
+
struct lm_ggml_tensor * r,
|
1891
|
+
struct lm_ggml_tensor * tf,
|
1892
|
+
struct lm_ggml_tensor * td,
|
1893
|
+
struct lm_ggml_tensor * state);
|
1894
|
+
|
1843
1895
|
// custom operators
|
1844
1896
|
|
1845
1897
|
typedef void (*lm_ggml_unary_op_f32_t) (const int, float *, const float *);
|
@@ -1923,8 +1975,6 @@ extern "C" {
|
|
1923
1975
|
typedef void (*lm_ggml_custom2_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, int ith, int nth, void * userdata);
|
1924
1976
|
typedef void (*lm_ggml_custom3_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, const struct lm_ggml_tensor * c, int ith, int nth, void * userdata);
|
1925
1977
|
|
1926
|
-
#define LM_GGML_N_TASKS_MAX -1
|
1927
|
-
|
1928
1978
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom1(
|
1929
1979
|
struct lm_ggml_context * ctx,
|
1930
1980
|
struct lm_ggml_tensor * a,
|
@@ -1994,26 +2044,44 @@ extern "C" {
|
|
1994
2044
|
struct lm_ggml_context * ctx,
|
1995
2045
|
struct lm_ggml_tensor * tensor);
|
1996
2046
|
|
1997
|
-
|
1998
2047
|
LM_GGML_API void lm_ggml_build_forward_expand (struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
|
1999
2048
|
LM_GGML_API void lm_ggml_build_backward_expand(struct lm_ggml_context * ctx, struct lm_ggml_cgraph * gf, struct lm_ggml_cgraph * gb, bool keep);
|
2000
2049
|
|
2001
2050
|
// graph allocation in a context
|
2002
|
-
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph
|
2003
|
-
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph_custom
|
2004
|
-
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup
|
2005
|
-
LM_GGML_API
|
2006
|
-
LM_GGML_API void
|
2007
|
-
LM_GGML_API void
|
2008
|
-
|
2051
|
+
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph (struct lm_ggml_context * ctx); // size = LM_GGML_DEFAULT_GRAPH_SIZE, grads = false
|
2052
|
+
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph_custom(struct lm_ggml_context * ctx, size_t size, bool grads);
|
2053
|
+
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph);
|
2054
|
+
LM_GGML_API void lm_ggml_graph_cpy (struct lm_ggml_cgraph * src, struct lm_ggml_cgraph * dst);
|
2055
|
+
LM_GGML_API void lm_ggml_graph_reset (struct lm_ggml_cgraph * cgraph); // zero grads
|
2056
|
+
LM_GGML_API void lm_ggml_graph_clear (struct lm_ggml_cgraph * cgraph);
|
2057
|
+
|
2058
|
+
LM_GGML_API int lm_ggml_graph_size (struct lm_ggml_cgraph * cgraph);
|
2059
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_graph_node (struct lm_ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
|
2060
|
+
LM_GGML_API struct lm_ggml_tensor ** lm_ggml_graph_nodes (struct lm_ggml_cgraph * cgraph);
|
2061
|
+
LM_GGML_API int lm_ggml_graph_n_nodes(struct lm_ggml_cgraph * cgraph);
|
2062
|
+
|
2063
|
+
LM_GGML_API void lm_ggml_graph_add_node(struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
|
2009
2064
|
|
2010
2065
|
LM_GGML_API size_t lm_ggml_graph_overhead(void);
|
2011
2066
|
LM_GGML_API size_t lm_ggml_graph_overhead_custom(size_t size, bool grads);
|
2012
2067
|
|
2068
|
+
LM_GGML_API struct lm_ggml_threadpool_params lm_ggml_threadpool_params_default(int n_threads);
|
2069
|
+
LM_GGML_API void lm_ggml_threadpool_params_init (struct lm_ggml_threadpool_params * p, int n_threads);
|
2070
|
+
LM_GGML_API bool lm_ggml_threadpool_params_match (const struct lm_ggml_threadpool_params * p0, const struct lm_ggml_threadpool_params * p1);
|
2071
|
+
LM_GGML_API struct lm_ggml_threadpool * lm_ggml_threadpool_new (struct lm_ggml_threadpool_params * params);
|
2072
|
+
LM_GGML_API void lm_ggml_threadpool_free (struct lm_ggml_threadpool * threadpool);
|
2073
|
+
LM_GGML_API int lm_ggml_threadpool_get_n_threads(struct lm_ggml_threadpool * threadpool);
|
2074
|
+
LM_GGML_API void lm_ggml_threadpool_pause (struct lm_ggml_threadpool * threadpool);
|
2075
|
+
LM_GGML_API void lm_ggml_threadpool_resume (struct lm_ggml_threadpool * threadpool);
|
2076
|
+
|
2013
2077
|
// lm_ggml_graph_plan() has to be called before lm_ggml_graph_compute()
|
2014
2078
|
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
2015
|
-
LM_GGML_API struct lm_ggml_cplan lm_ggml_graph_plan
|
2016
|
-
|
2079
|
+
LM_GGML_API struct lm_ggml_cplan lm_ggml_graph_plan(
|
2080
|
+
const struct lm_ggml_cgraph * cgraph,
|
2081
|
+
int n_threads, /* = LM_GGML_DEFAULT_N_THREADS */
|
2082
|
+
struct lm_ggml_threadpool * threadpool /* = NULL */ );
|
2083
|
+
LM_GGML_API enum lm_ggml_status lm_ggml_graph_compute(struct lm_ggml_cgraph * cgraph, struct lm_ggml_cplan * cplan);
|
2084
|
+
|
2017
2085
|
// same as lm_ggml_graph_compute() but the work data is allocated as a part of the context
|
2018
2086
|
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
|
2019
2087
|
LM_GGML_API enum lm_ggml_status lm_ggml_graph_compute_with_ctx(struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph, int n_threads);
|
@@ -2402,6 +2470,7 @@ extern "C" {
|
|
2402
2470
|
LM_GGML_API int lm_ggml_cpu_has_gpublas (void);
|
2403
2471
|
LM_GGML_API int lm_ggml_cpu_has_sse3 (void);
|
2404
2472
|
LM_GGML_API int lm_ggml_cpu_has_ssse3 (void);
|
2473
|
+
LM_GGML_API int lm_ggml_cpu_has_riscv_v (void);
|
2405
2474
|
LM_GGML_API int lm_ggml_cpu_has_sycl (void);
|
2406
2475
|
LM_GGML_API int lm_ggml_cpu_has_rpc (void);
|
2407
2476
|
LM_GGML_API int lm_ggml_cpu_has_vsx (void);
|