llama_cpp 0.3.8 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/README.md +1 -1
- data/examples/chat.rb +4 -6
- data/ext/llama_cpp/extconf.rb +3 -3
- data/ext/llama_cpp/llama_cpp.cpp +129 -124
- data/ext/llama_cpp/src/ggml-alloc.c +90 -113
- data/ext/llama_cpp/src/ggml-alloc.h +1 -1
- data/ext/llama_cpp/src/ggml-cuda.cu +350 -77
- data/ext/llama_cpp/src/ggml-cuda.h +13 -0
- data/ext/llama_cpp/src/ggml-metal.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.m +226 -121
- data/ext/llama_cpp/src/ggml-metal.metal +157 -35
- data/ext/llama_cpp/src/ggml.c +2724 -584
- data/ext/llama_cpp/src/ggml.h +282 -31
- data/ext/llama_cpp/src/k_quants.c +112 -56
- data/ext/llama_cpp/src/llama.cpp +4857 -2986
- data/ext/llama_cpp/src/llama.h +180 -126
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +2 -2
- data/sig/llama_cpp.rbs +12 -11
- metadata +2 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -130,13 +130,16 @@
|
|
130
130
|
// The data of the tensor is accessed via the "data" pointer. For example:
|
131
131
|
//
|
132
132
|
// {
|
133
|
-
//
|
133
|
+
// const int nx = 2;
|
134
|
+
// const int ny = 3;
|
134
135
|
//
|
135
|
-
//
|
136
|
-
// *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
|
136
|
+
// struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, ny);
|
137
137
|
//
|
138
|
-
//
|
139
|
-
//
|
138
|
+
// for (int y = 0; y < ny; y++) {
|
139
|
+
// for (int x = 0; x < nx; x++) {
|
140
|
+
// *(float *) ((char *) a->data + y*a->nb[1] + x*a->nb[0]) = x + y;
|
141
|
+
// }
|
142
|
+
// }
|
140
143
|
//
|
141
144
|
// ...
|
142
145
|
// }
|
@@ -207,14 +210,24 @@
|
|
207
210
|
#define GGML_MAX_PARAMS 256
|
208
211
|
#define GGML_MAX_CONTEXTS 64
|
209
212
|
#define GGML_MAX_SRC 6
|
210
|
-
#define GGML_MAX_NAME
|
213
|
+
#define GGML_MAX_NAME 64
|
211
214
|
#define GGML_MAX_OP_PARAMS 32
|
212
215
|
#define GGML_DEFAULT_N_THREADS 4
|
213
216
|
|
217
|
+
#if UINTPTR_MAX == 0xFFFFFFFF
|
218
|
+
#define GGML_MEM_ALIGN 4
|
219
|
+
#else
|
220
|
+
#define GGML_MEM_ALIGN 16
|
221
|
+
#endif
|
214
222
|
|
215
223
|
#define GGML_EXIT_SUCCESS 0
|
216
224
|
#define GGML_EXIT_ABORTED 1
|
217
225
|
|
226
|
+
#define GGUF_MAGIC 0x46554747 // "GGUF"
|
227
|
+
#define GGUF_VERSION 2
|
228
|
+
|
229
|
+
#define GGUF_DEFAULT_ALIGNMENT 32
|
230
|
+
|
218
231
|
#define GGML_UNUSED(x) (void)(x)
|
219
232
|
|
220
233
|
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
@@ -255,8 +268,9 @@
|
|
255
268
|
extern "C" {
|
256
269
|
#endif
|
257
270
|
|
258
|
-
#
|
259
|
-
|
271
|
+
#if defined(__ARM_NEON) && defined(__CUDACC__)
|
272
|
+
typedef half ggml_fp16_t;
|
273
|
+
#elif defined(__ARM_NEON)
|
260
274
|
typedef __fp16 ggml_fp16_t;
|
261
275
|
#else
|
262
276
|
typedef uint16_t ggml_fp16_t;
|
@@ -340,10 +354,12 @@ extern "C" {
|
|
340
354
|
GGML_OP_ARGMAX,
|
341
355
|
GGML_OP_REPEAT,
|
342
356
|
GGML_OP_REPEAT_BACK,
|
357
|
+
GGML_OP_CONCAT,
|
343
358
|
GGML_OP_SILU_BACK,
|
344
359
|
GGML_OP_NORM, // normalize
|
345
360
|
GGML_OP_RMS_NORM,
|
346
361
|
GGML_OP_RMS_NORM_BACK,
|
362
|
+
GGML_OP_GROUP_NORM,
|
347
363
|
|
348
364
|
GGML_OP_MUL_MAT,
|
349
365
|
GGML_OP_OUT_PROD,
|
@@ -369,14 +385,19 @@ extern "C" {
|
|
369
385
|
GGML_OP_CLAMP,
|
370
386
|
GGML_OP_CONV_1D,
|
371
387
|
GGML_OP_CONV_2D,
|
388
|
+
GGML_OP_CONV_TRANSPOSE_2D,
|
372
389
|
GGML_OP_POOL_1D,
|
373
390
|
GGML_OP_POOL_2D,
|
374
391
|
|
392
|
+
GGML_OP_UPSCALE, // nearest interpolate
|
393
|
+
|
375
394
|
GGML_OP_FLASH_ATTN,
|
376
395
|
GGML_OP_FLASH_FF,
|
377
396
|
GGML_OP_FLASH_ATTN_BACK,
|
378
397
|
GGML_OP_WIN_PART,
|
379
398
|
GGML_OP_WIN_UNPART,
|
399
|
+
GGML_OP_GET_REL_POS,
|
400
|
+
GGML_OP_ADD_REL_POS,
|
380
401
|
|
381
402
|
GGML_OP_UNARY,
|
382
403
|
|
@@ -458,6 +479,9 @@ extern "C" {
|
|
458
479
|
int64_t perf_cycles;
|
459
480
|
int64_t perf_time_us;
|
460
481
|
|
482
|
+
struct ggml_tensor * view_src;
|
483
|
+
size_t view_offs;
|
484
|
+
|
461
485
|
void * data;
|
462
486
|
|
463
487
|
char name[GGML_MAX_NAME];
|
@@ -562,6 +586,7 @@ extern "C" {
|
|
562
586
|
GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
|
563
587
|
GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
|
564
588
|
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
589
|
+
GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
|
565
590
|
GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
|
566
591
|
|
567
592
|
GGML_API int ggml_blck_size (enum ggml_type type);
|
@@ -639,7 +664,7 @@ extern "C" {
|
|
639
664
|
GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
640
665
|
|
641
666
|
GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
|
642
|
-
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx,
|
667
|
+
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
|
643
668
|
|
644
669
|
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
|
645
670
|
|
@@ -799,6 +824,13 @@ extern "C" {
|
|
799
824
|
struct ggml_tensor * a,
|
800
825
|
struct ggml_tensor * b);
|
801
826
|
|
827
|
+
// concat a and b on dim 2
|
828
|
+
// used in stable-diffusion
|
829
|
+
GGML_API struct ggml_tensor * ggml_concat(
|
830
|
+
struct ggml_context * ctx,
|
831
|
+
struct ggml_tensor * a,
|
832
|
+
struct ggml_tensor * b);
|
833
|
+
|
802
834
|
GGML_API struct ggml_tensor * ggml_abs(
|
803
835
|
struct ggml_context * ctx,
|
804
836
|
struct ggml_tensor * a);
|
@@ -888,14 +920,15 @@ extern "C" {
|
|
888
920
|
struct ggml_tensor * b);
|
889
921
|
|
890
922
|
// normalize along rows
|
891
|
-
// TODO: eps is hardcoded to 1e-5 for now
|
892
923
|
GGML_API struct ggml_tensor * ggml_norm(
|
893
924
|
struct ggml_context * ctx,
|
894
|
-
struct ggml_tensor * a
|
925
|
+
struct ggml_tensor * a,
|
926
|
+
float eps);
|
895
927
|
|
896
928
|
GGML_API struct ggml_tensor * ggml_norm_inplace(
|
897
929
|
struct ggml_context * ctx,
|
898
|
-
struct ggml_tensor * a
|
930
|
+
struct ggml_tensor * a,
|
931
|
+
float eps);
|
899
932
|
|
900
933
|
GGML_API struct ggml_tensor * ggml_rms_norm(
|
901
934
|
struct ggml_context * ctx,
|
@@ -907,13 +940,26 @@ extern "C" {
|
|
907
940
|
struct ggml_tensor * a,
|
908
941
|
float eps);
|
909
942
|
|
943
|
+
// group normalize along ne0*ne1*n_groups
|
944
|
+
// used in stable-diffusion
|
945
|
+
// TODO: eps is hardcoded to 1e-6 for now
|
946
|
+
GGML_API struct ggml_tensor * ggml_group_norm(
|
947
|
+
struct ggml_context * ctx,
|
948
|
+
struct ggml_tensor * a,
|
949
|
+
int n_groups);
|
950
|
+
|
951
|
+
GGML_API struct ggml_tensor * ggml_group_norm_inplace(
|
952
|
+
struct ggml_context * ctx,
|
953
|
+
struct ggml_tensor * a,
|
954
|
+
int n_groups);
|
955
|
+
|
910
956
|
// a - x
|
911
957
|
// b - dy
|
912
|
-
// TODO: update with configurable eps
|
913
958
|
GGML_API struct ggml_tensor * ggml_rms_norm_back(
|
914
959
|
struct ggml_context * ctx,
|
915
960
|
struct ggml_tensor * a,
|
916
|
-
struct ggml_tensor * b
|
961
|
+
struct ggml_tensor * b,
|
962
|
+
float eps);
|
917
963
|
|
918
964
|
// A: n columns, m rows
|
919
965
|
// B: n columns, p rows (i.e. we transpose it internally)
|
@@ -1207,6 +1253,15 @@ extern "C" {
|
|
1207
1253
|
float freq_base,
|
1208
1254
|
float freq_scale);
|
1209
1255
|
|
1256
|
+
// xPos RoPE, in-place, returns view(a)
|
1257
|
+
GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
|
1258
|
+
struct ggml_context * ctx,
|
1259
|
+
struct ggml_tensor * a,
|
1260
|
+
int n_past,
|
1261
|
+
int n_dims,
|
1262
|
+
float base,
|
1263
|
+
bool down);
|
1264
|
+
|
1210
1265
|
// rotary position embedding backward, i.e compute dx from dy
|
1211
1266
|
// a - dy
|
1212
1267
|
GGML_API struct ggml_tensor * ggml_rope_back(
|
@@ -1215,7 +1270,11 @@ extern "C" {
|
|
1215
1270
|
int n_past,
|
1216
1271
|
int n_dims,
|
1217
1272
|
int mode,
|
1218
|
-
int n_ctx
|
1273
|
+
int n_ctx,
|
1274
|
+
float freq_base,
|
1275
|
+
float freq_scale,
|
1276
|
+
float xpos_base,
|
1277
|
+
bool xpos_down);
|
1219
1278
|
|
1220
1279
|
// alibi position embedding
|
1221
1280
|
// in-place, returns view(a)
|
@@ -1242,6 +1301,15 @@ extern "C" {
|
|
1242
1301
|
int p0, // padding
|
1243
1302
|
int d0); // dilation
|
1244
1303
|
|
1304
|
+
// conv_1d with padding = half
|
1305
|
+
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
1306
|
+
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
1307
|
+
struct ggml_context * ctx,
|
1308
|
+
struct ggml_tensor * a,
|
1309
|
+
struct ggml_tensor * b,
|
1310
|
+
int s,
|
1311
|
+
int d);
|
1312
|
+
|
1245
1313
|
GGML_API struct ggml_tensor * ggml_conv_2d(
|
1246
1314
|
struct ggml_context * ctx,
|
1247
1315
|
struct ggml_tensor * a,
|
@@ -1253,14 +1321,38 @@ extern "C" {
|
|
1253
1321
|
int d0,
|
1254
1322
|
int d1);
|
1255
1323
|
|
1256
|
-
|
1257
|
-
//
|
1258
|
-
|
1324
|
+
|
1325
|
+
// kernel size is a->ne[0] x a->ne[1]
|
1326
|
+
// stride is equal to kernel size
|
1327
|
+
// padding is zero
|
1328
|
+
// example:
|
1329
|
+
// a: 16 16 3 768
|
1330
|
+
// b: 1024 1024 3 1
|
1331
|
+
// res: 64 64 768 1
|
1332
|
+
// used in sam
|
1333
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
|
1334
|
+
struct ggml_context * ctx,
|
1335
|
+
struct ggml_tensor * a,
|
1336
|
+
struct ggml_tensor * b);
|
1337
|
+
|
1338
|
+
// kernel size is a->ne[0] x a->ne[1]
|
1339
|
+
// stride is 1
|
1340
|
+
// padding is half
|
1341
|
+
// example:
|
1342
|
+
// a: 3 3 256 256
|
1343
|
+
// b: 64 64 256 1
|
1344
|
+
// res: 64 64 256 1
|
1345
|
+
// used in sam
|
1346
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph(
|
1347
|
+
struct ggml_context * ctx,
|
1348
|
+
struct ggml_tensor * a,
|
1349
|
+
struct ggml_tensor * b);
|
1350
|
+
|
1351
|
+
GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
1259
1352
|
struct ggml_context * ctx,
|
1260
1353
|
struct ggml_tensor * a,
|
1261
1354
|
struct ggml_tensor * b,
|
1262
|
-
int
|
1263
|
-
int d);
|
1355
|
+
int stride);
|
1264
1356
|
|
1265
1357
|
enum ggml_op_pool {
|
1266
1358
|
GGML_OP_POOL_MAX,
|
@@ -1287,6 +1379,13 @@ extern "C" {
|
|
1287
1379
|
int p0,
|
1288
1380
|
int p1);
|
1289
1381
|
|
1382
|
+
// nearest interpolate
|
1383
|
+
// used in stable-diffusion
|
1384
|
+
GGML_API struct ggml_tensor * ggml_upscale(
|
1385
|
+
struct ggml_context * ctx,
|
1386
|
+
struct ggml_tensor * a,
|
1387
|
+
int scale_factor);
|
1388
|
+
|
1290
1389
|
GGML_API struct ggml_tensor * ggml_flash_attn(
|
1291
1390
|
struct ggml_context * ctx,
|
1292
1391
|
struct ggml_tensor * q,
|
@@ -1340,6 +1439,27 @@ extern "C" {
|
|
1340
1439
|
struct ggml_tensor * a,
|
1341
1440
|
enum ggml_unary_op op);
|
1342
1441
|
|
1442
|
+
// used in sam
|
1443
|
+
GGML_API struct ggml_tensor * ggml_get_rel_pos(
|
1444
|
+
struct ggml_context * ctx,
|
1445
|
+
struct ggml_tensor * a,
|
1446
|
+
int qh,
|
1447
|
+
int kh);
|
1448
|
+
|
1449
|
+
// used in sam
|
1450
|
+
|
1451
|
+
GGML_API struct ggml_tensor * ggml_add_rel_pos(
|
1452
|
+
struct ggml_context * ctx,
|
1453
|
+
struct ggml_tensor * a,
|
1454
|
+
struct ggml_tensor * pw,
|
1455
|
+
struct ggml_tensor * ph);
|
1456
|
+
|
1457
|
+
GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
|
1458
|
+
struct ggml_context * ctx,
|
1459
|
+
struct ggml_tensor * a,
|
1460
|
+
struct ggml_tensor * pw,
|
1461
|
+
struct ggml_tensor * ph);
|
1462
|
+
|
1343
1463
|
// custom operators
|
1344
1464
|
|
1345
1465
|
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
@@ -1495,7 +1615,8 @@ extern "C" {
|
|
1495
1615
|
struct ggml_tensor * tensor);
|
1496
1616
|
|
1497
1617
|
|
1498
|
-
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
1618
|
+
GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
1619
|
+
GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
|
1499
1620
|
|
1500
1621
|
GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
|
1501
1622
|
GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
|
@@ -1560,6 +1681,8 @@ extern "C" {
|
|
1560
1681
|
GGML_LINESEARCH_INVALID_PARAMETERS,
|
1561
1682
|
};
|
1562
1683
|
|
1684
|
+
typedef void (*ggml_opt_callback)(void * data, float * sched);
|
1685
|
+
|
1563
1686
|
// optimization parameters
|
1564
1687
|
//
|
1565
1688
|
// see ggml.c (ggml_opt_default_params) for default values
|
@@ -1595,12 +1718,14 @@ extern "C" {
|
|
1595
1718
|
|
1596
1719
|
float sched; // schedule multiplier (fixed, decay or warmup)
|
1597
1720
|
float decay; // weight decay for AdamW, use 0.0f to disable
|
1721
|
+
int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
|
1598
1722
|
float alpha; // learning rate
|
1599
1723
|
float beta1;
|
1600
1724
|
float beta2;
|
1601
1725
|
float eps; // epsilon for numerical stability
|
1602
1726
|
float eps_f; // epsilon for convergence test
|
1603
1727
|
float eps_g; // epsilon for convergence test
|
1728
|
+
float gclip; // gradient clipping
|
1604
1729
|
} adam;
|
1605
1730
|
|
1606
1731
|
// LBFGS parameters
|
@@ -1628,14 +1753,12 @@ extern "C" {
|
|
1628
1753
|
|
1629
1754
|
bool just_initialized;
|
1630
1755
|
|
1756
|
+
float loss_before;
|
1757
|
+
float loss_after;
|
1758
|
+
|
1631
1759
|
struct {
|
1632
|
-
struct ggml_tensor * x; // view of the parameters
|
1633
|
-
struct ggml_tensor * g1; // gradient
|
1634
|
-
struct ggml_tensor * g2; // gradient squared
|
1635
1760
|
struct ggml_tensor * m; // first moment
|
1636
1761
|
struct ggml_tensor * v; // second moment
|
1637
|
-
struct ggml_tensor * mh; // first moment hat
|
1638
|
-
struct ggml_tensor * vh; // second moment hat
|
1639
1762
|
struct ggml_tensor * pf; // past function values
|
1640
1763
|
float fx_best;
|
1641
1764
|
float fx_prev;
|
@@ -1672,10 +1795,10 @@ extern "C" {
|
|
1672
1795
|
|
1673
1796
|
// initialize optimizer context
|
1674
1797
|
GGML_API void ggml_opt_init(
|
1675
|
-
struct ggml_context
|
1798
|
+
struct ggml_context * ctx,
|
1676
1799
|
struct ggml_opt_context * opt,
|
1677
|
-
struct ggml_opt_params
|
1678
|
-
int64_t
|
1800
|
+
struct ggml_opt_params params,
|
1801
|
+
int64_t nx);
|
1679
1802
|
|
1680
1803
|
// continue optimizing the function defined by the tensor f
|
1681
1804
|
GGML_API enum ggml_opt_result ggml_opt_resume(
|
@@ -1689,7 +1812,9 @@ extern "C" {
|
|
1689
1812
|
struct ggml_opt_context * opt,
|
1690
1813
|
struct ggml_tensor * f,
|
1691
1814
|
struct ggml_cgraph * gf,
|
1692
|
-
struct ggml_cgraph * gb
|
1815
|
+
struct ggml_cgraph * gb,
|
1816
|
+
ggml_opt_callback callback,
|
1817
|
+
void * callback_data);
|
1693
1818
|
|
1694
1819
|
//
|
1695
1820
|
// quantization
|
@@ -1703,6 +1828,127 @@ extern "C" {
|
|
1703
1828
|
|
1704
1829
|
GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
|
1705
1830
|
|
1831
|
+
//
|
1832
|
+
// gguf
|
1833
|
+
//
|
1834
|
+
|
1835
|
+
enum gguf_type {
|
1836
|
+
GGUF_TYPE_UINT8 = 0,
|
1837
|
+
GGUF_TYPE_INT8 = 1,
|
1838
|
+
GGUF_TYPE_UINT16 = 2,
|
1839
|
+
GGUF_TYPE_INT16 = 3,
|
1840
|
+
GGUF_TYPE_UINT32 = 4,
|
1841
|
+
GGUF_TYPE_INT32 = 5,
|
1842
|
+
GGUF_TYPE_FLOAT32 = 6,
|
1843
|
+
GGUF_TYPE_BOOL = 7,
|
1844
|
+
GGUF_TYPE_STRING = 8,
|
1845
|
+
GGUF_TYPE_ARRAY = 9,
|
1846
|
+
GGUF_TYPE_UINT64 = 10,
|
1847
|
+
GGUF_TYPE_INT64 = 11,
|
1848
|
+
GGUF_TYPE_FLOAT64 = 12,
|
1849
|
+
GGUF_TYPE_COUNT, // marks the end of the enum
|
1850
|
+
};
|
1851
|
+
|
1852
|
+
struct gguf_context;
|
1853
|
+
|
1854
|
+
struct gguf_init_params {
|
1855
|
+
bool no_alloc;
|
1856
|
+
|
1857
|
+
// if not NULL, create a ggml_context and allocate the tensor data in it
|
1858
|
+
struct ggml_context ** ctx;
|
1859
|
+
};
|
1860
|
+
|
1861
|
+
GGML_API struct gguf_context * gguf_init_empty(void);
|
1862
|
+
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
|
1863
|
+
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
|
1864
|
+
|
1865
|
+
GGML_API void gguf_free(struct gguf_context * ctx);
|
1866
|
+
|
1867
|
+
GGML_API const char * gguf_type_name(enum gguf_type type);
|
1868
|
+
|
1869
|
+
GGML_API int gguf_get_version (struct gguf_context * ctx);
|
1870
|
+
GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);
|
1871
|
+
GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
|
1872
|
+
GGML_API void * gguf_get_data (struct gguf_context * ctx);
|
1873
|
+
|
1874
|
+
GGML_API int gguf_get_n_kv(struct gguf_context * ctx);
|
1875
|
+
GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);
|
1876
|
+
GGML_API const char * gguf_get_key (struct gguf_context * ctx, int i);
|
1877
|
+
|
1878
|
+
GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int i);
|
1879
|
+
GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i);
|
1880
|
+
|
1881
|
+
// results are undefined if the wrong type is used for the key
|
1882
|
+
GGML_API uint8_t gguf_get_val_u8 (struct gguf_context * ctx, int i);
|
1883
|
+
GGML_API int8_t gguf_get_val_i8 (struct gguf_context * ctx, int i);
|
1884
|
+
GGML_API uint16_t gguf_get_val_u16 (struct gguf_context * ctx, int i);
|
1885
|
+
GGML_API int16_t gguf_get_val_i16 (struct gguf_context * ctx, int i);
|
1886
|
+
GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
|
1887
|
+
GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
|
1888
|
+
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
|
1889
|
+
GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i);
|
1890
|
+
GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i);
|
1891
|
+
GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int i);
|
1892
|
+
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
|
1893
|
+
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
|
1894
|
+
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
|
1895
|
+
GGML_API const void * gguf_get_arr_data(struct gguf_context * ctx, int i);
|
1896
|
+
GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);
|
1897
|
+
|
1898
|
+
GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
|
1899
|
+
GGML_API int gguf_find_tensor (struct gguf_context * ctx, const char * name);
|
1900
|
+
GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
|
1901
|
+
GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);
|
1902
|
+
|
1903
|
+
// overrides existing values or adds a new one
|
1904
|
+
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
1905
|
+
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
|
1906
|
+
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
|
1907
|
+
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
|
1908
|
+
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
|
1909
|
+
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
|
1910
|
+
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
|
1911
|
+
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
|
1912
|
+
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
|
1913
|
+
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
|
1914
|
+
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
|
1915
|
+
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
|
1916
|
+
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
|
1917
|
+
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
|
1918
|
+
|
1919
|
+
// set or add KV pairs from another context
|
1920
|
+
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
|
1921
|
+
|
1922
|
+
// manage tensor info
|
1923
|
+
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
|
1924
|
+
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
|
1925
|
+
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
|
1926
|
+
|
1927
|
+
// writing gguf files can be done in 2 ways:
|
1928
|
+
//
|
1929
|
+
// - write the entire gguf_context to a binary file in a single pass:
|
1930
|
+
//
|
1931
|
+
// gguf_write_to_file(ctx, fname);
|
1932
|
+
//
|
1933
|
+
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
|
1934
|
+
//
|
1935
|
+
// FILE * f = fopen(fname, "wb");
|
1936
|
+
// fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
|
1937
|
+
// fwrite(f, ...);
|
1938
|
+
// void * data = gguf_meta_get_meta_data(ctx);
|
1939
|
+
// fseek(f, 0, SEEK_SET);
|
1940
|
+
// fwrite(f, data, gguf_get_meta_size(ctx));
|
1941
|
+
// free(data);
|
1942
|
+
// fclose(f);
|
1943
|
+
//
|
1944
|
+
|
1945
|
+
// write the entire context to a binary file
|
1946
|
+
GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta);
|
1947
|
+
|
1948
|
+
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
1949
|
+
GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx);
|
1950
|
+
GGML_API void gguf_get_meta_data(struct gguf_context * ctx, void * data);
|
1951
|
+
|
1706
1952
|
//
|
1707
1953
|
// system info
|
1708
1954
|
//
|
@@ -1723,6 +1969,7 @@ extern "C" {
|
|
1723
1969
|
GGML_API int ggml_cpu_has_clblast (void);
|
1724
1970
|
GGML_API int ggml_cpu_has_gpublas (void);
|
1725
1971
|
GGML_API int ggml_cpu_has_sse3 (void);
|
1972
|
+
GGML_API int ggml_cpu_has_ssse3 (void);
|
1726
1973
|
GGML_API int ggml_cpu_has_vsx (void);
|
1727
1974
|
|
1728
1975
|
//
|
@@ -1740,6 +1987,10 @@ extern "C" {
|
|
1740
1987
|
typedef void (*ggml_vec_dot_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
|
1741
1988
|
|
1742
1989
|
typedef struct {
|
1990
|
+
const char * type_name;
|
1991
|
+
int blck_size;
|
1992
|
+
size_t type_size;
|
1993
|
+
bool is_quantized;
|
1743
1994
|
ggml_to_float_t to_float;
|
1744
1995
|
ggml_from_float_t from_float;
|
1745
1996
|
ggml_from_float_t from_float_reference;
|
@@ -1747,7 +1998,7 @@ extern "C" {
|
|
1747
1998
|
enum ggml_type vec_dot_type;
|
1748
1999
|
} ggml_type_traits_t;
|
1749
2000
|
|
1750
|
-
ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type
|
2001
|
+
ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
|
1751
2002
|
|
1752
2003
|
#ifdef __cplusplus
|
1753
2004
|
}
|