cui-llama.rn 1.1.2 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +1 -2
- package/android/src/main/jni.cpp +26 -21
- package/cpp/common.cpp +2028 -1520
- package/cpp/common.h +134 -18
- package/cpp/ggml-aarch64.c +612 -0
- package/cpp/ggml-alloc.h +2 -2
- package/cpp/ggml-backend.c +33 -6
- package/cpp/ggml-backend.h +2 -0
- package/cpp/ggml-common.h +20 -0
- package/cpp/ggml-impl.h +4 -7
- package/cpp/ggml-metal.m +63 -2
- package/cpp/ggml-quants.c +690 -2
- package/cpp/ggml-quants.h +15 -0
- package/cpp/ggml.c +1650 -317
- package/cpp/ggml.h +155 -48
- package/cpp/llama-grammar.cpp +721 -122
- package/cpp/llama-grammar.h +120 -15
- package/cpp/llama-impl.h +132 -1
- package/cpp/llama-sampling.cpp +1361 -356
- package/cpp/llama-sampling.h +20 -48
- package/cpp/llama-vocab.cpp +140 -7
- package/cpp/llama-vocab.h +3 -2
- package/cpp/llama.cpp +810 -307
- package/cpp/llama.h +213 -259
- package/cpp/rn-llama.hpp +17 -14
- package/cpp/sampling.cpp +347 -355
- package/cpp/sampling.h +106 -135
- package/cpp/sgemm.cpp +153 -0
- package/package.json +1 -1
- package/cpp/grammar-parser.cpp +0 -539
- package/cpp/grammar-parser.h +0 -29
package/cpp/ggml.h
CHANGED
@@ -220,7 +220,7 @@
|
|
220
220
|
#include <stdio.h>
|
221
221
|
|
222
222
|
#define LM_GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
223
|
-
#define LM_GGML_FILE_VERSION
|
223
|
+
#define LM_GGML_FILE_VERSION 2
|
224
224
|
|
225
225
|
#define LM_GGML_QNT_VERSION 2 // bump this on quantization format changes
|
226
226
|
#define LM_GGML_QNT_VERSION_FACTOR 1000 // do not change this
|
@@ -231,6 +231,8 @@
|
|
231
231
|
#define LM_GGML_MAX_SRC 10
|
232
232
|
#ifndef LM_GGML_MAX_NAME
|
233
233
|
#define LM_GGML_MAX_NAME 64
|
234
|
+
#define LM_GGML_MAX_N_THREADS 512
|
235
|
+
|
234
236
|
#endif
|
235
237
|
#define LM_GGML_MAX_OP_PARAMS 64
|
236
238
|
#define LM_GGML_DEFAULT_N_THREADS 4
|
@@ -393,6 +395,8 @@ extern "C" {
|
|
393
395
|
LM_GGML_TYPE_Q4_0_4_4 = 31,
|
394
396
|
LM_GGML_TYPE_Q4_0_4_8 = 32,
|
395
397
|
LM_GGML_TYPE_Q4_0_8_8 = 33,
|
398
|
+
LM_GGML_TYPE_TQ1_0 = 34,
|
399
|
+
LM_GGML_TYPE_TQ2_0 = 35,
|
396
400
|
LM_GGML_TYPE_COUNT,
|
397
401
|
};
|
398
402
|
|
@@ -453,6 +457,8 @@ extern "C" {
|
|
453
457
|
LM_GGML_OP_SQR,
|
454
458
|
LM_GGML_OP_SQRT,
|
455
459
|
LM_GGML_OP_LOG,
|
460
|
+
LM_GGML_OP_SIN,
|
461
|
+
LM_GGML_OP_COS,
|
456
462
|
LM_GGML_OP_SUM,
|
457
463
|
LM_GGML_OP_SUM_ROWS,
|
458
464
|
LM_GGML_OP_MEAN,
|
@@ -490,9 +496,11 @@ extern "C" {
|
|
490
496
|
LM_GGML_OP_CLAMP,
|
491
497
|
LM_GGML_OP_CONV_TRANSPOSE_1D,
|
492
498
|
LM_GGML_OP_IM2COL,
|
499
|
+
LM_GGML_OP_IM2COL_BACK,
|
493
500
|
LM_GGML_OP_CONV_TRANSPOSE_2D,
|
494
501
|
LM_GGML_OP_POOL_1D,
|
495
502
|
LM_GGML_OP_POOL_2D,
|
503
|
+
LM_GGML_OP_POOL_2D_BACK,
|
496
504
|
LM_GGML_OP_UPSCALE, // nearest interpolate
|
497
505
|
LM_GGML_OP_PAD,
|
498
506
|
LM_GGML_OP_ARANGE,
|
@@ -508,6 +516,7 @@ extern "C" {
|
|
508
516
|
LM_GGML_OP_WIN_UNPART,
|
509
517
|
LM_GGML_OP_GET_REL_POS,
|
510
518
|
LM_GGML_OP_ADD_REL_POS,
|
519
|
+
LM_GGML_OP_RWKV_WKV,
|
511
520
|
|
512
521
|
LM_GGML_OP_UNARY,
|
513
522
|
|
@@ -542,6 +551,7 @@ extern "C" {
|
|
542
551
|
LM_GGML_UNARY_OP_SILU,
|
543
552
|
LM_GGML_UNARY_OP_HARDSWISH,
|
544
553
|
LM_GGML_UNARY_OP_HARDSIGMOID,
|
554
|
+
LM_GGML_UNARY_OP_EXP,
|
545
555
|
|
546
556
|
LM_GGML_UNARY_OP_COUNT,
|
547
557
|
};
|
@@ -624,6 +634,29 @@ extern "C" {
|
|
624
634
|
// If it returns true, the computation is aborted
|
625
635
|
typedef bool (*lm_ggml_abort_callback)(void * data);
|
626
636
|
|
637
|
+
// Scheduling priorities
|
638
|
+
enum lm_ggml_sched_priority {
|
639
|
+
LM_GGML_SCHED_PRIO_NORMAL,
|
640
|
+
LM_GGML_SCHED_PRIO_MEDIUM,
|
641
|
+
LM_GGML_SCHED_PRIO_HIGH,
|
642
|
+
LM_GGML_SCHED_PRIO_REALTIME
|
643
|
+
};
|
644
|
+
|
645
|
+
// Threadpool params
|
646
|
+
// Use lm_ggml_threadpool_params_default() or lm_ggml_threadpool_params_init() to populate the defaults
|
647
|
+
struct lm_ggml_threadpool_params {
|
648
|
+
bool cpumask[LM_GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
649
|
+
int n_threads; // number of threads
|
650
|
+
enum lm_ggml_sched_priority prio; // thread priority
|
651
|
+
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
652
|
+
bool strict_cpu; // strict cpu placement
|
653
|
+
bool paused; // start in paused state
|
654
|
+
};
|
655
|
+
|
656
|
+
struct lm_ggml_threadpool; // forward declaration, see ggml.c
|
657
|
+
|
658
|
+
typedef struct lm_ggml_threadpool * lm_ggml_threadpool_t;
|
659
|
+
|
627
660
|
// the compute plan that needs to be prepared for lm_ggml_graph_compute()
|
628
661
|
// since https://github.com/ggerganov/ggml/issues/287
|
629
662
|
struct lm_ggml_cplan {
|
@@ -631,6 +664,7 @@ extern "C" {
|
|
631
664
|
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `lm_ggml_graph_compute()`
|
632
665
|
|
633
666
|
int n_threads;
|
667
|
+
struct lm_ggml_threadpool * threadpool;
|
634
668
|
|
635
669
|
// abort lm_ggml_graph_compute when true
|
636
670
|
lm_ggml_abort_callback abort_callback;
|
@@ -647,8 +681,8 @@ extern "C" {
|
|
647
681
|
|
648
682
|
struct lm_ggml_hash_set {
|
649
683
|
size_t size;
|
650
|
-
lm_ggml_bitset_t * used;
|
651
|
-
struct lm_ggml_tensor ** keys;
|
684
|
+
lm_ggml_bitset_t * used; // whether or not the keys are in use i.e. set
|
685
|
+
struct lm_ggml_tensor ** keys; // actual tensors in the set, keys[i] is only defined if lm_ggml_bitset_get(used, i)
|
652
686
|
};
|
653
687
|
|
654
688
|
// computation graph
|
@@ -969,6 +1003,22 @@ extern "C" {
|
|
969
1003
|
struct lm_ggml_context * ctx,
|
970
1004
|
struct lm_ggml_tensor * a);
|
971
1005
|
|
1006
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_sin(
|
1007
|
+
struct lm_ggml_context * ctx,
|
1008
|
+
struct lm_ggml_tensor * a);
|
1009
|
+
|
1010
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_sin_inplace(
|
1011
|
+
struct lm_ggml_context * ctx,
|
1012
|
+
struct lm_ggml_tensor * a);
|
1013
|
+
|
1014
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_cos(
|
1015
|
+
struct lm_ggml_context * ctx,
|
1016
|
+
struct lm_ggml_tensor * a);
|
1017
|
+
|
1018
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_cos_inplace(
|
1019
|
+
struct lm_ggml_context * ctx,
|
1020
|
+
struct lm_ggml_tensor * a);
|
1021
|
+
|
972
1022
|
// return scalar
|
973
1023
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_sum(
|
974
1024
|
struct lm_ggml_context * ctx,
|
@@ -1119,6 +1169,14 @@ extern "C" {
|
|
1119
1169
|
struct lm_ggml_context * ctx,
|
1120
1170
|
struct lm_ggml_tensor * a);
|
1121
1171
|
|
1172
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_exp(
|
1173
|
+
struct lm_ggml_context * ctx,
|
1174
|
+
struct lm_ggml_tensor * a);
|
1175
|
+
|
1176
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_exp_inplace(
|
1177
|
+
struct lm_ggml_context * ctx,
|
1178
|
+
struct lm_ggml_tensor * a);
|
1179
|
+
|
1122
1180
|
// normalize along rows
|
1123
1181
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_norm(
|
1124
1182
|
struct lm_ggml_context * ctx,
|
@@ -1214,7 +1272,7 @@ extern "C" {
|
|
1214
1272
|
size_t nb1,
|
1215
1273
|
size_t nb2,
|
1216
1274
|
size_t nb3,
|
1217
|
-
size_t offset);
|
1275
|
+
size_t offset); // in bytes
|
1218
1276
|
|
1219
1277
|
// b -> view(a,offset,nb1,nb2,3), return view(a)
|
1220
1278
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_inplace(
|
@@ -1224,19 +1282,19 @@ extern "C" {
|
|
1224
1282
|
size_t nb1,
|
1225
1283
|
size_t nb2,
|
1226
1284
|
size_t nb3,
|
1227
|
-
size_t offset);
|
1285
|
+
size_t offset); // in bytes
|
1228
1286
|
|
1229
1287
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_1d(
|
1230
1288
|
struct lm_ggml_context * ctx,
|
1231
1289
|
struct lm_ggml_tensor * a,
|
1232
1290
|
struct lm_ggml_tensor * b,
|
1233
|
-
size_t offset);
|
1291
|
+
size_t offset); // in bytes
|
1234
1292
|
|
1235
1293
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_1d_inplace(
|
1236
1294
|
struct lm_ggml_context * ctx,
|
1237
1295
|
struct lm_ggml_tensor * a,
|
1238
1296
|
struct lm_ggml_tensor * b,
|
1239
|
-
size_t offset);
|
1297
|
+
size_t offset); // in bytes
|
1240
1298
|
|
1241
1299
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
1242
1300
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_2d(
|
@@ -1244,7 +1302,7 @@ extern "C" {
|
|
1244
1302
|
struct lm_ggml_tensor * a,
|
1245
1303
|
struct lm_ggml_tensor * b,
|
1246
1304
|
size_t nb1,
|
1247
|
-
size_t offset);
|
1305
|
+
size_t offset); // in bytes
|
1248
1306
|
|
1249
1307
|
// b -> view(a,offset,nb1,nb2,3), return view(a)
|
1250
1308
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_set_2d_inplace(
|
@@ -1252,7 +1310,7 @@ extern "C" {
|
|
1252
1310
|
struct lm_ggml_tensor * a,
|
1253
1311
|
struct lm_ggml_tensor * b,
|
1254
1312
|
size_t nb1,
|
1255
|
-
size_t offset);
|
1313
|
+
size_t offset); // in bytes
|
1256
1314
|
|
1257
1315
|
// a -> b, return view(b)
|
1258
1316
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_cpy(
|
@@ -1566,34 +1624,49 @@ extern "C" {
|
|
1566
1624
|
float min,
|
1567
1625
|
float max);
|
1568
1626
|
|
1627
|
+
// im2col
|
1628
|
+
// converts data into a format that effectively results in a convolution when combined with matrix multiplication
|
1569
1629
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_im2col(
|
1570
1630
|
struct lm_ggml_context * ctx,
|
1571
|
-
struct lm_ggml_tensor * a,
|
1572
|
-
struct lm_ggml_tensor * b,
|
1573
|
-
int
|
1574
|
-
int
|
1575
|
-
int
|
1576
|
-
int
|
1577
|
-
int
|
1578
|
-
int
|
1579
|
-
bool
|
1580
|
-
enum lm_ggml_type
|
1631
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1632
|
+
struct lm_ggml_tensor * b, // data
|
1633
|
+
int s0, // stride dimension 0
|
1634
|
+
int s1, // stride dimension 1
|
1635
|
+
int p0, // padding dimension 0
|
1636
|
+
int p1, // padding dimension 1
|
1637
|
+
int d0, // dilation dimension 0
|
1638
|
+
int d1, // dilation dimension 1
|
1639
|
+
bool is_2D,
|
1640
|
+
enum lm_ggml_type dst_type);
|
1641
|
+
|
1642
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_im2col_back(
|
1643
|
+
struct lm_ggml_context * ctx,
|
1644
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1645
|
+
struct lm_ggml_tensor * b, // gradient of im2col output
|
1646
|
+
int64_t * ne, // shape of im2col input
|
1647
|
+
int s0, // stride dimension 0
|
1648
|
+
int s1, // stride dimension 1
|
1649
|
+
int p0, // padding dimension 0
|
1650
|
+
int p1, // padding dimension 1
|
1651
|
+
int d0, // dilation dimension 0
|
1652
|
+
int d1, // dilation dimension 1
|
1653
|
+
bool is_2D);
|
1581
1654
|
|
1582
1655
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_depthwise_2d(
|
1583
1656
|
struct lm_ggml_context * ctx,
|
1584
|
-
struct lm_ggml_tensor * a,
|
1585
|
-
struct lm_ggml_tensor * b,
|
1586
|
-
int s0,
|
1587
|
-
int s1,
|
1588
|
-
int p0,
|
1589
|
-
int p1,
|
1590
|
-
int d0,
|
1591
|
-
int d1);
|
1657
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1658
|
+
struct lm_ggml_tensor * b, // data
|
1659
|
+
int s0, // stride dimension 0
|
1660
|
+
int s1, // stride dimension 1
|
1661
|
+
int p0, // padding dimension 0
|
1662
|
+
int p1, // padding dimension 1
|
1663
|
+
int d0, // dilation dimension 0
|
1664
|
+
int d1); // dilation dimension 1
|
1592
1665
|
|
1593
1666
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d(
|
1594
1667
|
struct lm_ggml_context * ctx,
|
1595
|
-
struct lm_ggml_tensor * a,
|
1596
|
-
struct lm_ggml_tensor * b,
|
1668
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1669
|
+
struct lm_ggml_tensor * b, // data
|
1597
1670
|
int s0, // stride
|
1598
1671
|
int p0, // padding
|
1599
1672
|
int d0); // dilation
|
@@ -1602,29 +1675,29 @@ extern "C" {
|
|
1602
1675
|
// alias for lm_ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
1603
1676
|
LM_GGML_API struct lm_ggml_tensor* lm_ggml_conv_1d_ph(
|
1604
1677
|
struct lm_ggml_context * ctx,
|
1605
|
-
struct lm_ggml_tensor * a,
|
1606
|
-
struct lm_ggml_tensor * b,
|
1607
|
-
int s,
|
1608
|
-
int d);
|
1678
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1679
|
+
struct lm_ggml_tensor * b, // data
|
1680
|
+
int s, // stride
|
1681
|
+
int d); // dilation
|
1609
1682
|
|
1610
1683
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
|
1611
1684
|
struct lm_ggml_context * ctx,
|
1612
|
-
struct lm_ggml_tensor * a,
|
1613
|
-
struct lm_ggml_tensor * b,
|
1614
|
-
int s0,
|
1615
|
-
int p0,
|
1616
|
-
int d0);
|
1685
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1686
|
+
struct lm_ggml_tensor * b, // data
|
1687
|
+
int s0, // stride
|
1688
|
+
int p0, // padding
|
1689
|
+
int d0); // dilation
|
1617
1690
|
|
1618
1691
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d(
|
1619
1692
|
struct lm_ggml_context * ctx,
|
1620
|
-
struct lm_ggml_tensor * a,
|
1621
|
-
struct lm_ggml_tensor * b,
|
1622
|
-
int s0,
|
1623
|
-
int s1,
|
1624
|
-
int p0,
|
1625
|
-
int p1,
|
1626
|
-
int d0,
|
1627
|
-
int d1);
|
1693
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1694
|
+
struct lm_ggml_tensor * b, // data
|
1695
|
+
int s0, // stride dimension 0
|
1696
|
+
int s1, // stride dimension 1
|
1697
|
+
int p0, // padding dimension 0
|
1698
|
+
int p1, // padding dimension 1
|
1699
|
+
int d0, // dilation dimension 0
|
1700
|
+
int d1); // dilation dimension 1
|
1628
1701
|
|
1629
1702
|
|
1630
1703
|
// kernel size is a->ne[0] x a->ne[1]
|
@@ -1686,6 +1759,18 @@ extern "C" {
|
|
1686
1759
|
float p0,
|
1687
1760
|
float p1);
|
1688
1761
|
|
1762
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_pool_2d_back(
|
1763
|
+
struct lm_ggml_context * ctx,
|
1764
|
+
struct lm_ggml_tensor * a,
|
1765
|
+
struct lm_ggml_tensor * af, // "a"/input used in forward pass
|
1766
|
+
enum lm_ggml_op_pool op,
|
1767
|
+
int k0,
|
1768
|
+
int k1,
|
1769
|
+
int s0,
|
1770
|
+
int s1,
|
1771
|
+
float p0,
|
1772
|
+
float p1);
|
1773
|
+
|
1689
1774
|
// nearest interpolate
|
1690
1775
|
// multiplies ne0 and ne1 by scale factor
|
1691
1776
|
// used in stable-diffusion
|
@@ -1840,6 +1925,15 @@ extern "C" {
|
|
1840
1925
|
struct lm_ggml_tensor * pw,
|
1841
1926
|
struct lm_ggml_tensor * ph);
|
1842
1927
|
|
1928
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_rwkv_wkv(
|
1929
|
+
struct lm_ggml_context * ctx,
|
1930
|
+
struct lm_ggml_tensor * k,
|
1931
|
+
struct lm_ggml_tensor * v,
|
1932
|
+
struct lm_ggml_tensor * r,
|
1933
|
+
struct lm_ggml_tensor * tf,
|
1934
|
+
struct lm_ggml_tensor * td,
|
1935
|
+
struct lm_ggml_tensor * state);
|
1936
|
+
|
1843
1937
|
// custom operators
|
1844
1938
|
|
1845
1939
|
typedef void (*lm_ggml_unary_op_f32_t) (const int, float *, const float *);
|
@@ -2010,10 +2104,23 @@ extern "C" {
|
|
2010
2104
|
LM_GGML_API size_t lm_ggml_graph_overhead(void);
|
2011
2105
|
LM_GGML_API size_t lm_ggml_graph_overhead_custom(size_t size, bool grads);
|
2012
2106
|
|
2107
|
+
LM_GGML_API struct lm_ggml_threadpool_params lm_ggml_threadpool_params_default(int n_threads);
|
2108
|
+
LM_GGML_API void lm_ggml_threadpool_params_init (struct lm_ggml_threadpool_params *p, int n_threads);
|
2109
|
+
LM_GGML_API bool lm_ggml_threadpool_params_match (const struct lm_ggml_threadpool_params *p0, const struct lm_ggml_threadpool_params *p1);
|
2110
|
+
LM_GGML_API struct lm_ggml_threadpool* lm_ggml_threadpool_new (struct lm_ggml_threadpool_params * params);
|
2111
|
+
LM_GGML_API void lm_ggml_threadpool_free (struct lm_ggml_threadpool * threadpool);
|
2112
|
+
LM_GGML_API int lm_ggml_threadpool_get_n_threads(struct lm_ggml_threadpool * threadpool);
|
2113
|
+
LM_GGML_API void lm_ggml_threadpool_pause (struct lm_ggml_threadpool * threadpool);
|
2114
|
+
LM_GGML_API void lm_ggml_threadpool_resume (struct lm_ggml_threadpool * threadpool);
|
2115
|
+
|
2013
2116
|
// lm_ggml_graph_plan() has to be called before lm_ggml_graph_compute()
|
2014
2117
|
// when plan.work_size > 0, caller must allocate memory for plan.work_data
|
2015
|
-
LM_GGML_API struct lm_ggml_cplan lm_ggml_graph_plan
|
2016
|
-
|
2118
|
+
LM_GGML_API struct lm_ggml_cplan lm_ggml_graph_plan(
|
2119
|
+
const struct lm_ggml_cgraph * cgraph,
|
2120
|
+
int n_threads, /* = LM_GGML_DEFAULT_N_THREADS */
|
2121
|
+
struct lm_ggml_threadpool * threadpool /* = NULL */ );
|
2122
|
+
LM_GGML_API enum lm_ggml_status lm_ggml_graph_compute(struct lm_ggml_cgraph * cgraph, struct lm_ggml_cplan * cplan);
|
2123
|
+
|
2017
2124
|
// same as lm_ggml_graph_compute() but the work data is allocated as a part of the context
|
2018
2125
|
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
|
2019
2126
|
LM_GGML_API enum lm_ggml_status lm_ggml_graph_compute_with_ctx(struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph, int n_threads);
|