llama_cpp 0.3.8 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -0
- data/README.md +1 -1
- data/examples/chat.rb +4 -6
- data/ext/llama_cpp/extconf.rb +3 -3
- data/ext/llama_cpp/llama_cpp.cpp +129 -124
- data/ext/llama_cpp/src/ggml-alloc.c +90 -113
- data/ext/llama_cpp/src/ggml-alloc.h +1 -1
- data/ext/llama_cpp/src/ggml-cuda.cu +350 -77
- data/ext/llama_cpp/src/ggml-cuda.h +13 -0
- data/ext/llama_cpp/src/ggml-metal.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.m +226 -121
- data/ext/llama_cpp/src/ggml-metal.metal +157 -35
- data/ext/llama_cpp/src/ggml.c +2724 -584
- data/ext/llama_cpp/src/ggml.h +282 -31
- data/ext/llama_cpp/src/k_quants.c +112 -56
- data/ext/llama_cpp/src/llama.cpp +4857 -2986
- data/ext/llama_cpp/src/llama.h +180 -126
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +2 -2
- data/sig/llama_cpp.rbs +12 -11
- metadata +2 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -130,13 +130,16 @@
|
|
130
130
|
// The data of the tensor is accessed via the "data" pointer. For example:
|
131
131
|
//
|
132
132
|
// {
|
133
|
-
//
|
133
|
+
// const int nx = 2;
|
134
|
+
// const int ny = 3;
|
134
135
|
//
|
135
|
-
//
|
136
|
-
// *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
|
136
|
+
// struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, ny);
|
137
137
|
//
|
138
|
-
//
|
139
|
-
//
|
138
|
+
// for (int y = 0; y < ny; y++) {
|
139
|
+
// for (int x = 0; x < nx; x++) {
|
140
|
+
// *(float *) ((char *) a->data + y*a->nb[1] + x*a->nb[0]) = x + y;
|
141
|
+
// }
|
142
|
+
// }
|
140
143
|
//
|
141
144
|
// ...
|
142
145
|
// }
|
@@ -207,14 +210,24 @@
|
|
207
210
|
#define GGML_MAX_PARAMS 256
|
208
211
|
#define GGML_MAX_CONTEXTS 64
|
209
212
|
#define GGML_MAX_SRC 6
|
210
|
-
#define GGML_MAX_NAME
|
213
|
+
#define GGML_MAX_NAME 64
|
211
214
|
#define GGML_MAX_OP_PARAMS 32
|
212
215
|
#define GGML_DEFAULT_N_THREADS 4
|
213
216
|
|
217
|
+
#if UINTPTR_MAX == 0xFFFFFFFF
|
218
|
+
#define GGML_MEM_ALIGN 4
|
219
|
+
#else
|
220
|
+
#define GGML_MEM_ALIGN 16
|
221
|
+
#endif
|
214
222
|
|
215
223
|
#define GGML_EXIT_SUCCESS 0
|
216
224
|
#define GGML_EXIT_ABORTED 1
|
217
225
|
|
226
|
+
#define GGUF_MAGIC 0x46554747 // "GGUF"
|
227
|
+
#define GGUF_VERSION 2
|
228
|
+
|
229
|
+
#define GGUF_DEFAULT_ALIGNMENT 32
|
230
|
+
|
218
231
|
#define GGML_UNUSED(x) (void)(x)
|
219
232
|
|
220
233
|
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
|
@@ -255,8 +268,9 @@
|
|
255
268
|
extern "C" {
|
256
269
|
#endif
|
257
270
|
|
258
|
-
#
|
259
|
-
|
271
|
+
#if defined(__ARM_NEON) && defined(__CUDACC__)
|
272
|
+
typedef half ggml_fp16_t;
|
273
|
+
#elif defined(__ARM_NEON)
|
260
274
|
typedef __fp16 ggml_fp16_t;
|
261
275
|
#else
|
262
276
|
typedef uint16_t ggml_fp16_t;
|
@@ -340,10 +354,12 @@ extern "C" {
|
|
340
354
|
GGML_OP_ARGMAX,
|
341
355
|
GGML_OP_REPEAT,
|
342
356
|
GGML_OP_REPEAT_BACK,
|
357
|
+
GGML_OP_CONCAT,
|
343
358
|
GGML_OP_SILU_BACK,
|
344
359
|
GGML_OP_NORM, // normalize
|
345
360
|
GGML_OP_RMS_NORM,
|
346
361
|
GGML_OP_RMS_NORM_BACK,
|
362
|
+
GGML_OP_GROUP_NORM,
|
347
363
|
|
348
364
|
GGML_OP_MUL_MAT,
|
349
365
|
GGML_OP_OUT_PROD,
|
@@ -369,14 +385,19 @@ extern "C" {
|
|
369
385
|
GGML_OP_CLAMP,
|
370
386
|
GGML_OP_CONV_1D,
|
371
387
|
GGML_OP_CONV_2D,
|
388
|
+
GGML_OP_CONV_TRANSPOSE_2D,
|
372
389
|
GGML_OP_POOL_1D,
|
373
390
|
GGML_OP_POOL_2D,
|
374
391
|
|
392
|
+
GGML_OP_UPSCALE, // nearest interpolate
|
393
|
+
|
375
394
|
GGML_OP_FLASH_ATTN,
|
376
395
|
GGML_OP_FLASH_FF,
|
377
396
|
GGML_OP_FLASH_ATTN_BACK,
|
378
397
|
GGML_OP_WIN_PART,
|
379
398
|
GGML_OP_WIN_UNPART,
|
399
|
+
GGML_OP_GET_REL_POS,
|
400
|
+
GGML_OP_ADD_REL_POS,
|
380
401
|
|
381
402
|
GGML_OP_UNARY,
|
382
403
|
|
@@ -458,6 +479,9 @@ extern "C" {
|
|
458
479
|
int64_t perf_cycles;
|
459
480
|
int64_t perf_time_us;
|
460
481
|
|
482
|
+
struct ggml_tensor * view_src;
|
483
|
+
size_t view_offs;
|
484
|
+
|
461
485
|
void * data;
|
462
486
|
|
463
487
|
char name[GGML_MAX_NAME];
|
@@ -562,6 +586,7 @@ extern "C" {
|
|
562
586
|
GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
|
563
587
|
GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
|
564
588
|
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
|
589
|
+
GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
|
565
590
|
GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
|
566
591
|
|
567
592
|
GGML_API int ggml_blck_size (enum ggml_type type);
|
@@ -639,7 +664,7 @@ extern "C" {
|
|
639
664
|
GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
|
640
665
|
|
641
666
|
GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
|
642
|
-
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx,
|
667
|
+
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
|
643
668
|
|
644
669
|
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
|
645
670
|
|
@@ -799,6 +824,13 @@ extern "C" {
|
|
799
824
|
struct ggml_tensor * a,
|
800
825
|
struct ggml_tensor * b);
|
801
826
|
|
827
|
+
// concat a and b on dim 2
|
828
|
+
// used in stable-diffusion
|
829
|
+
GGML_API struct ggml_tensor * ggml_concat(
|
830
|
+
struct ggml_context * ctx,
|
831
|
+
struct ggml_tensor * a,
|
832
|
+
struct ggml_tensor * b);
|
833
|
+
|
802
834
|
GGML_API struct ggml_tensor * ggml_abs(
|
803
835
|
struct ggml_context * ctx,
|
804
836
|
struct ggml_tensor * a);
|
@@ -888,14 +920,15 @@ extern "C" {
|
|
888
920
|
struct ggml_tensor * b);
|
889
921
|
|
890
922
|
// normalize along rows
|
891
|
-
// TODO: eps is hardcoded to 1e-5 for now
|
892
923
|
GGML_API struct ggml_tensor * ggml_norm(
|
893
924
|
struct ggml_context * ctx,
|
894
|
-
struct ggml_tensor * a
|
925
|
+
struct ggml_tensor * a,
|
926
|
+
float eps);
|
895
927
|
|
896
928
|
GGML_API struct ggml_tensor * ggml_norm_inplace(
|
897
929
|
struct ggml_context * ctx,
|
898
|
-
struct ggml_tensor * a
|
930
|
+
struct ggml_tensor * a,
|
931
|
+
float eps);
|
899
932
|
|
900
933
|
GGML_API struct ggml_tensor * ggml_rms_norm(
|
901
934
|
struct ggml_context * ctx,
|
@@ -907,13 +940,26 @@ extern "C" {
|
|
907
940
|
struct ggml_tensor * a,
|
908
941
|
float eps);
|
909
942
|
|
943
|
+
// group normalize along ne0*ne1*n_groups
|
944
|
+
// used in stable-diffusion
|
945
|
+
// TODO: eps is hardcoded to 1e-6 for now
|
946
|
+
GGML_API struct ggml_tensor * ggml_group_norm(
|
947
|
+
struct ggml_context * ctx,
|
948
|
+
struct ggml_tensor * a,
|
949
|
+
int n_groups);
|
950
|
+
|
951
|
+
GGML_API struct ggml_tensor * ggml_group_norm_inplace(
|
952
|
+
struct ggml_context * ctx,
|
953
|
+
struct ggml_tensor * a,
|
954
|
+
int n_groups);
|
955
|
+
|
910
956
|
// a - x
|
911
957
|
// b - dy
|
912
|
-
// TODO: update with configurable eps
|
913
958
|
GGML_API struct ggml_tensor * ggml_rms_norm_back(
|
914
959
|
struct ggml_context * ctx,
|
915
960
|
struct ggml_tensor * a,
|
916
|
-
struct ggml_tensor * b
|
961
|
+
struct ggml_tensor * b,
|
962
|
+
float eps);
|
917
963
|
|
918
964
|
// A: n columns, m rows
|
919
965
|
// B: n columns, p rows (i.e. we transpose it internally)
|
@@ -1207,6 +1253,15 @@ extern "C" {
|
|
1207
1253
|
float freq_base,
|
1208
1254
|
float freq_scale);
|
1209
1255
|
|
1256
|
+
// xPos RoPE, in-place, returns view(a)
|
1257
|
+
GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
|
1258
|
+
struct ggml_context * ctx,
|
1259
|
+
struct ggml_tensor * a,
|
1260
|
+
int n_past,
|
1261
|
+
int n_dims,
|
1262
|
+
float base,
|
1263
|
+
bool down);
|
1264
|
+
|
1210
1265
|
// rotary position embedding backward, i.e compute dx from dy
|
1211
1266
|
// a - dy
|
1212
1267
|
GGML_API struct ggml_tensor * ggml_rope_back(
|
@@ -1215,7 +1270,11 @@ extern "C" {
|
|
1215
1270
|
int n_past,
|
1216
1271
|
int n_dims,
|
1217
1272
|
int mode,
|
1218
|
-
int n_ctx
|
1273
|
+
int n_ctx,
|
1274
|
+
float freq_base,
|
1275
|
+
float freq_scale,
|
1276
|
+
float xpos_base,
|
1277
|
+
bool xpos_down);
|
1219
1278
|
|
1220
1279
|
// alibi position embedding
|
1221
1280
|
// in-place, returns view(a)
|
@@ -1242,6 +1301,15 @@ extern "C" {
|
|
1242
1301
|
int p0, // padding
|
1243
1302
|
int d0); // dilation
|
1244
1303
|
|
1304
|
+
// conv_1d with padding = half
|
1305
|
+
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
1306
|
+
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
1307
|
+
struct ggml_context * ctx,
|
1308
|
+
struct ggml_tensor * a,
|
1309
|
+
struct ggml_tensor * b,
|
1310
|
+
int s,
|
1311
|
+
int d);
|
1312
|
+
|
1245
1313
|
GGML_API struct ggml_tensor * ggml_conv_2d(
|
1246
1314
|
struct ggml_context * ctx,
|
1247
1315
|
struct ggml_tensor * a,
|
@@ -1253,14 +1321,38 @@ extern "C" {
|
|
1253
1321
|
int d0,
|
1254
1322
|
int d1);
|
1255
1323
|
|
1256
|
-
|
1257
|
-
//
|
1258
|
-
|
1324
|
+
|
1325
|
+
// kernel size is a->ne[0] x a->ne[1]
|
1326
|
+
// stride is equal to kernel size
|
1327
|
+
// padding is zero
|
1328
|
+
// example:
|
1329
|
+
// a: 16 16 3 768
|
1330
|
+
// b: 1024 1024 3 1
|
1331
|
+
// res: 64 64 768 1
|
1332
|
+
// used in sam
|
1333
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
|
1334
|
+
struct ggml_context * ctx,
|
1335
|
+
struct ggml_tensor * a,
|
1336
|
+
struct ggml_tensor * b);
|
1337
|
+
|
1338
|
+
// kernel size is a->ne[0] x a->ne[1]
|
1339
|
+
// stride is 1
|
1340
|
+
// padding is half
|
1341
|
+
// example:
|
1342
|
+
// a: 3 3 256 256
|
1343
|
+
// b: 64 64 256 1
|
1344
|
+
// res: 64 64 256 1
|
1345
|
+
// used in sam
|
1346
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph(
|
1347
|
+
struct ggml_context * ctx,
|
1348
|
+
struct ggml_tensor * a,
|
1349
|
+
struct ggml_tensor * b);
|
1350
|
+
|
1351
|
+
GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
1259
1352
|
struct ggml_context * ctx,
|
1260
1353
|
struct ggml_tensor * a,
|
1261
1354
|
struct ggml_tensor * b,
|
1262
|
-
int
|
1263
|
-
int d);
|
1355
|
+
int stride);
|
1264
1356
|
|
1265
1357
|
enum ggml_op_pool {
|
1266
1358
|
GGML_OP_POOL_MAX,
|
@@ -1287,6 +1379,13 @@ extern "C" {
|
|
1287
1379
|
int p0,
|
1288
1380
|
int p1);
|
1289
1381
|
|
1382
|
+
// nearest interpolate
|
1383
|
+
// used in stable-diffusion
|
1384
|
+
GGML_API struct ggml_tensor * ggml_upscale(
|
1385
|
+
struct ggml_context * ctx,
|
1386
|
+
struct ggml_tensor * a,
|
1387
|
+
int scale_factor);
|
1388
|
+
|
1290
1389
|
GGML_API struct ggml_tensor * ggml_flash_attn(
|
1291
1390
|
struct ggml_context * ctx,
|
1292
1391
|
struct ggml_tensor * q,
|
@@ -1340,6 +1439,27 @@ extern "C" {
|
|
1340
1439
|
struct ggml_tensor * a,
|
1341
1440
|
enum ggml_unary_op op);
|
1342
1441
|
|
1442
|
+
// used in sam
|
1443
|
+
GGML_API struct ggml_tensor * ggml_get_rel_pos(
|
1444
|
+
struct ggml_context * ctx,
|
1445
|
+
struct ggml_tensor * a,
|
1446
|
+
int qh,
|
1447
|
+
int kh);
|
1448
|
+
|
1449
|
+
// used in sam
|
1450
|
+
|
1451
|
+
GGML_API struct ggml_tensor * ggml_add_rel_pos(
|
1452
|
+
struct ggml_context * ctx,
|
1453
|
+
struct ggml_tensor * a,
|
1454
|
+
struct ggml_tensor * pw,
|
1455
|
+
struct ggml_tensor * ph);
|
1456
|
+
|
1457
|
+
GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
|
1458
|
+
struct ggml_context * ctx,
|
1459
|
+
struct ggml_tensor * a,
|
1460
|
+
struct ggml_tensor * pw,
|
1461
|
+
struct ggml_tensor * ph);
|
1462
|
+
|
1343
1463
|
// custom operators
|
1344
1464
|
|
1345
1465
|
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
@@ -1495,7 +1615,8 @@ extern "C" {
|
|
1495
1615
|
struct ggml_tensor * tensor);
|
1496
1616
|
|
1497
1617
|
|
1498
|
-
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
1618
|
+
GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
1619
|
+
GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
|
1499
1620
|
|
1500
1621
|
GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
|
1501
1622
|
GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
|
@@ -1560,6 +1681,8 @@ extern "C" {
|
|
1560
1681
|
GGML_LINESEARCH_INVALID_PARAMETERS,
|
1561
1682
|
};
|
1562
1683
|
|
1684
|
+
typedef void (*ggml_opt_callback)(void * data, float * sched);
|
1685
|
+
|
1563
1686
|
// optimization parameters
|
1564
1687
|
//
|
1565
1688
|
// see ggml.c (ggml_opt_default_params) for default values
|
@@ -1595,12 +1718,14 @@ extern "C" {
|
|
1595
1718
|
|
1596
1719
|
float sched; // schedule multiplier (fixed, decay or warmup)
|
1597
1720
|
float decay; // weight decay for AdamW, use 0.0f to disable
|
1721
|
+
int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
|
1598
1722
|
float alpha; // learning rate
|
1599
1723
|
float beta1;
|
1600
1724
|
float beta2;
|
1601
1725
|
float eps; // epsilon for numerical stability
|
1602
1726
|
float eps_f; // epsilon for convergence test
|
1603
1727
|
float eps_g; // epsilon for convergence test
|
1728
|
+
float gclip; // gradient clipping
|
1604
1729
|
} adam;
|
1605
1730
|
|
1606
1731
|
// LBFGS parameters
|
@@ -1628,14 +1753,12 @@ extern "C" {
|
|
1628
1753
|
|
1629
1754
|
bool just_initialized;
|
1630
1755
|
|
1756
|
+
float loss_before;
|
1757
|
+
float loss_after;
|
1758
|
+
|
1631
1759
|
struct {
|
1632
|
-
struct ggml_tensor * x; // view of the parameters
|
1633
|
-
struct ggml_tensor * g1; // gradient
|
1634
|
-
struct ggml_tensor * g2; // gradient squared
|
1635
1760
|
struct ggml_tensor * m; // first moment
|
1636
1761
|
struct ggml_tensor * v; // second moment
|
1637
|
-
struct ggml_tensor * mh; // first moment hat
|
1638
|
-
struct ggml_tensor * vh; // second moment hat
|
1639
1762
|
struct ggml_tensor * pf; // past function values
|
1640
1763
|
float fx_best;
|
1641
1764
|
float fx_prev;
|
@@ -1672,10 +1795,10 @@ extern "C" {
|
|
1672
1795
|
|
1673
1796
|
// initialize optimizer context
|
1674
1797
|
GGML_API void ggml_opt_init(
|
1675
|
-
struct ggml_context
|
1798
|
+
struct ggml_context * ctx,
|
1676
1799
|
struct ggml_opt_context * opt,
|
1677
|
-
struct ggml_opt_params
|
1678
|
-
int64_t
|
1800
|
+
struct ggml_opt_params params,
|
1801
|
+
int64_t nx);
|
1679
1802
|
|
1680
1803
|
// continue optimizing the function defined by the tensor f
|
1681
1804
|
GGML_API enum ggml_opt_result ggml_opt_resume(
|
@@ -1689,7 +1812,9 @@ extern "C" {
|
|
1689
1812
|
struct ggml_opt_context * opt,
|
1690
1813
|
struct ggml_tensor * f,
|
1691
1814
|
struct ggml_cgraph * gf,
|
1692
|
-
struct ggml_cgraph * gb
|
1815
|
+
struct ggml_cgraph * gb,
|
1816
|
+
ggml_opt_callback callback,
|
1817
|
+
void * callback_data);
|
1693
1818
|
|
1694
1819
|
//
|
1695
1820
|
// quantization
|
@@ -1703,6 +1828,127 @@ extern "C" {
|
|
1703
1828
|
|
1704
1829
|
GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
|
1705
1830
|
|
1831
|
+
//
|
1832
|
+
// gguf
|
1833
|
+
//
|
1834
|
+
|
1835
|
+
enum gguf_type {
|
1836
|
+
GGUF_TYPE_UINT8 = 0,
|
1837
|
+
GGUF_TYPE_INT8 = 1,
|
1838
|
+
GGUF_TYPE_UINT16 = 2,
|
1839
|
+
GGUF_TYPE_INT16 = 3,
|
1840
|
+
GGUF_TYPE_UINT32 = 4,
|
1841
|
+
GGUF_TYPE_INT32 = 5,
|
1842
|
+
GGUF_TYPE_FLOAT32 = 6,
|
1843
|
+
GGUF_TYPE_BOOL = 7,
|
1844
|
+
GGUF_TYPE_STRING = 8,
|
1845
|
+
GGUF_TYPE_ARRAY = 9,
|
1846
|
+
GGUF_TYPE_UINT64 = 10,
|
1847
|
+
GGUF_TYPE_INT64 = 11,
|
1848
|
+
GGUF_TYPE_FLOAT64 = 12,
|
1849
|
+
GGUF_TYPE_COUNT, // marks the end of the enum
|
1850
|
+
};
|
1851
|
+
|
1852
|
+
struct gguf_context;
|
1853
|
+
|
1854
|
+
struct gguf_init_params {
|
1855
|
+
bool no_alloc;
|
1856
|
+
|
1857
|
+
// if not NULL, create a ggml_context and allocate the tensor data in it
|
1858
|
+
struct ggml_context ** ctx;
|
1859
|
+
};
|
1860
|
+
|
1861
|
+
GGML_API struct gguf_context * gguf_init_empty(void);
|
1862
|
+
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
|
1863
|
+
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
|
1864
|
+
|
1865
|
+
GGML_API void gguf_free(struct gguf_context * ctx);
|
1866
|
+
|
1867
|
+
GGML_API const char * gguf_type_name(enum gguf_type type);
|
1868
|
+
|
1869
|
+
GGML_API int gguf_get_version (struct gguf_context * ctx);
|
1870
|
+
GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);
|
1871
|
+
GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
|
1872
|
+
GGML_API void * gguf_get_data (struct gguf_context * ctx);
|
1873
|
+
|
1874
|
+
GGML_API int gguf_get_n_kv(struct gguf_context * ctx);
|
1875
|
+
GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);
|
1876
|
+
GGML_API const char * gguf_get_key (struct gguf_context * ctx, int i);
|
1877
|
+
|
1878
|
+
GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int i);
|
1879
|
+
GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i);
|
1880
|
+
|
1881
|
+
// results are undefined if the wrong type is used for the key
|
1882
|
+
GGML_API uint8_t gguf_get_val_u8 (struct gguf_context * ctx, int i);
|
1883
|
+
GGML_API int8_t gguf_get_val_i8 (struct gguf_context * ctx, int i);
|
1884
|
+
GGML_API uint16_t gguf_get_val_u16 (struct gguf_context * ctx, int i);
|
1885
|
+
GGML_API int16_t gguf_get_val_i16 (struct gguf_context * ctx, int i);
|
1886
|
+
GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
|
1887
|
+
GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
|
1888
|
+
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
|
1889
|
+
GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i);
|
1890
|
+
GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i);
|
1891
|
+
GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int i);
|
1892
|
+
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
|
1893
|
+
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
|
1894
|
+
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
|
1895
|
+
GGML_API const void * gguf_get_arr_data(struct gguf_context * ctx, int i);
|
1896
|
+
GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);
|
1897
|
+
|
1898
|
+
GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
|
1899
|
+
GGML_API int gguf_find_tensor (struct gguf_context * ctx, const char * name);
|
1900
|
+
GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
|
1901
|
+
GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);
|
1902
|
+
|
1903
|
+
// overrides existing values or adds a new one
|
1904
|
+
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
1905
|
+
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
|
1906
|
+
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
|
1907
|
+
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
|
1908
|
+
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
|
1909
|
+
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
|
1910
|
+
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
|
1911
|
+
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
|
1912
|
+
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
|
1913
|
+
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
|
1914
|
+
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
|
1915
|
+
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
|
1916
|
+
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
|
1917
|
+
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
|
1918
|
+
|
1919
|
+
// set or add KV pairs from another context
|
1920
|
+
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
|
1921
|
+
|
1922
|
+
// manage tensor info
|
1923
|
+
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
|
1924
|
+
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
|
1925
|
+
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
|
1926
|
+
|
1927
|
+
// writing gguf files can be done in 2 ways:
|
1928
|
+
//
|
1929
|
+
// - write the entire gguf_context to a binary file in a single pass:
|
1930
|
+
//
|
1931
|
+
// gguf_write_to_file(ctx, fname);
|
1932
|
+
//
|
1933
|
+
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
|
1934
|
+
//
|
1935
|
+
// FILE * f = fopen(fname, "wb");
|
1936
|
+
// fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
|
1937
|
+
// fwrite(f, ...);
|
1938
|
+
// void * data = gguf_meta_get_meta_data(ctx);
|
1939
|
+
// fseek(f, 0, SEEK_SET);
|
1940
|
+
// fwrite(f, data, gguf_get_meta_size(ctx));
|
1941
|
+
// free(data);
|
1942
|
+
// fclose(f);
|
1943
|
+
//
|
1944
|
+
|
1945
|
+
// write the entire context to a binary file
|
1946
|
+
GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta);
|
1947
|
+
|
1948
|
+
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
1949
|
+
GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx);
|
1950
|
+
GGML_API void gguf_get_meta_data(struct gguf_context * ctx, void * data);
|
1951
|
+
|
1706
1952
|
//
|
1707
1953
|
// system info
|
1708
1954
|
//
|
@@ -1723,6 +1969,7 @@ extern "C" {
|
|
1723
1969
|
GGML_API int ggml_cpu_has_clblast (void);
|
1724
1970
|
GGML_API int ggml_cpu_has_gpublas (void);
|
1725
1971
|
GGML_API int ggml_cpu_has_sse3 (void);
|
1972
|
+
GGML_API int ggml_cpu_has_ssse3 (void);
|
1726
1973
|
GGML_API int ggml_cpu_has_vsx (void);
|
1727
1974
|
|
1728
1975
|
//
|
@@ -1740,6 +1987,10 @@ extern "C" {
|
|
1740
1987
|
typedef void (*ggml_vec_dot_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
|
1741
1988
|
|
1742
1989
|
typedef struct {
|
1990
|
+
const char * type_name;
|
1991
|
+
int blck_size;
|
1992
|
+
size_t type_size;
|
1993
|
+
bool is_quantized;
|
1743
1994
|
ggml_to_float_t to_float;
|
1744
1995
|
ggml_from_float_t from_float;
|
1745
1996
|
ggml_from_float_t from_float_reference;
|
@@ -1747,7 +1998,7 @@ extern "C" {
|
|
1747
1998
|
enum ggml_type vec_dot_type;
|
1748
1999
|
} ggml_type_traits_t;
|
1749
2000
|
|
1750
|
-
ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type
|
2001
|
+
ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
|
1751
2002
|
|
1752
2003
|
#ifdef __cplusplus
|
1753
2004
|
}
|