llama_cpp 0.3.8 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -130,13 +130,16 @@
130
130
  // The data of the tensor is accessed via the "data" pointer. For example:
131
131
  //
132
132
  // {
133
- // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
133
+ // const int nx = 2;
134
+ // const int ny = 3;
134
135
  //
135
- // // a[2, 1] = 1.0f;
136
- // *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
136
+ // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, ny);
137
137
  //
138
- // // a[0, 2] = 2.0f;
139
- // *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f;
138
+ // for (int y = 0; y < ny; y++) {
139
+ // for (int x = 0; x < nx; x++) {
140
+ // *(float *) ((char *) a->data + y*a->nb[1] + x*a->nb[0]) = x + y;
141
+ // }
142
+ // }
140
143
  //
141
144
  // ...
142
145
  // }
@@ -207,14 +210,24 @@
207
210
  #define GGML_MAX_PARAMS 256
208
211
  #define GGML_MAX_CONTEXTS 64
209
212
  #define GGML_MAX_SRC 6
210
- #define GGML_MAX_NAME 48
213
+ #define GGML_MAX_NAME 64
211
214
  #define GGML_MAX_OP_PARAMS 32
212
215
  #define GGML_DEFAULT_N_THREADS 4
213
216
 
217
+ #if UINTPTR_MAX == 0xFFFFFFFF
218
+ #define GGML_MEM_ALIGN 4
219
+ #else
220
+ #define GGML_MEM_ALIGN 16
221
+ #endif
214
222
 
215
223
  #define GGML_EXIT_SUCCESS 0
216
224
  #define GGML_EXIT_ABORTED 1
217
225
 
226
+ #define GGUF_MAGIC 0x46554747 // "GGUF"
227
+ #define GGUF_VERSION 2
228
+
229
+ #define GGUF_DEFAULT_ALIGNMENT 32
230
+
218
231
  #define GGML_UNUSED(x) (void)(x)
219
232
 
220
233
  #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
@@ -255,8 +268,9 @@
255
268
  extern "C" {
256
269
  #endif
257
270
 
258
- #ifdef __ARM_NEON
259
- // we use the built-in 16-bit float type
271
+ #if defined(__ARM_NEON) && defined(__CUDACC__)
272
+ typedef half ggml_fp16_t;
273
+ #elif defined(__ARM_NEON)
260
274
  typedef __fp16 ggml_fp16_t;
261
275
  #else
262
276
  typedef uint16_t ggml_fp16_t;
@@ -340,10 +354,12 @@ extern "C" {
340
354
  GGML_OP_ARGMAX,
341
355
  GGML_OP_REPEAT,
342
356
  GGML_OP_REPEAT_BACK,
357
+ GGML_OP_CONCAT,
343
358
  GGML_OP_SILU_BACK,
344
359
  GGML_OP_NORM, // normalize
345
360
  GGML_OP_RMS_NORM,
346
361
  GGML_OP_RMS_NORM_BACK,
362
+ GGML_OP_GROUP_NORM,
347
363
 
348
364
  GGML_OP_MUL_MAT,
349
365
  GGML_OP_OUT_PROD,
@@ -369,14 +385,19 @@ extern "C" {
369
385
  GGML_OP_CLAMP,
370
386
  GGML_OP_CONV_1D,
371
387
  GGML_OP_CONV_2D,
388
+ GGML_OP_CONV_TRANSPOSE_2D,
372
389
  GGML_OP_POOL_1D,
373
390
  GGML_OP_POOL_2D,
374
391
 
392
+ GGML_OP_UPSCALE, // nearest interpolate
393
+
375
394
  GGML_OP_FLASH_ATTN,
376
395
  GGML_OP_FLASH_FF,
377
396
  GGML_OP_FLASH_ATTN_BACK,
378
397
  GGML_OP_WIN_PART,
379
398
  GGML_OP_WIN_UNPART,
399
+ GGML_OP_GET_REL_POS,
400
+ GGML_OP_ADD_REL_POS,
380
401
 
381
402
  GGML_OP_UNARY,
382
403
 
@@ -458,6 +479,9 @@ extern "C" {
458
479
  int64_t perf_cycles;
459
480
  int64_t perf_time_us;
460
481
 
482
+ struct ggml_tensor * view_src;
483
+ size_t view_offs;
484
+
461
485
  void * data;
462
486
 
463
487
  char name[GGML_MAX_NAME];
@@ -562,6 +586,7 @@ extern "C" {
562
586
  GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
563
587
  GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
564
588
  GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
589
+ GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
565
590
  GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
566
591
 
567
592
  GGML_API int ggml_blck_size (enum ggml_type type);
@@ -639,7 +664,7 @@ extern "C" {
639
664
  GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
640
665
 
641
666
  GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
642
- GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
667
+ GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
643
668
 
644
669
  GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
645
670
 
@@ -799,6 +824,13 @@ extern "C" {
799
824
  struct ggml_tensor * a,
800
825
  struct ggml_tensor * b);
801
826
 
827
+ // concat a and b on dim 2
828
+ // used in stable-diffusion
829
+ GGML_API struct ggml_tensor * ggml_concat(
830
+ struct ggml_context * ctx,
831
+ struct ggml_tensor * a,
832
+ struct ggml_tensor * b);
833
+
802
834
  GGML_API struct ggml_tensor * ggml_abs(
803
835
  struct ggml_context * ctx,
804
836
  struct ggml_tensor * a);
@@ -888,14 +920,15 @@ extern "C" {
888
920
  struct ggml_tensor * b);
889
921
 
890
922
  // normalize along rows
891
- // TODO: eps is hardcoded to 1e-5 for now
892
923
  GGML_API struct ggml_tensor * ggml_norm(
893
924
  struct ggml_context * ctx,
894
- struct ggml_tensor * a);
925
+ struct ggml_tensor * a,
926
+ float eps);
895
927
 
896
928
  GGML_API struct ggml_tensor * ggml_norm_inplace(
897
929
  struct ggml_context * ctx,
898
- struct ggml_tensor * a);
930
+ struct ggml_tensor * a,
931
+ float eps);
899
932
 
900
933
  GGML_API struct ggml_tensor * ggml_rms_norm(
901
934
  struct ggml_context * ctx,
@@ -907,13 +940,26 @@ extern "C" {
907
940
  struct ggml_tensor * a,
908
941
  float eps);
909
942
 
943
+ // group normalize along ne0*ne1*n_groups
944
+ // used in stable-diffusion
945
+ // TODO: eps is hardcoded to 1e-6 for now
946
+ GGML_API struct ggml_tensor * ggml_group_norm(
947
+ struct ggml_context * ctx,
948
+ struct ggml_tensor * a,
949
+ int n_groups);
950
+
951
+ GGML_API struct ggml_tensor * ggml_group_norm_inplace(
952
+ struct ggml_context * ctx,
953
+ struct ggml_tensor * a,
954
+ int n_groups);
955
+
910
956
  // a - x
911
957
  // b - dy
912
- // TODO: update with configurable eps
913
958
  GGML_API struct ggml_tensor * ggml_rms_norm_back(
914
959
  struct ggml_context * ctx,
915
960
  struct ggml_tensor * a,
916
- struct ggml_tensor * b);
961
+ struct ggml_tensor * b,
962
+ float eps);
917
963
 
918
964
  // A: n columns, m rows
919
965
  // B: n columns, p rows (i.e. we transpose it internally)
@@ -1207,6 +1253,15 @@ extern "C" {
1207
1253
  float freq_base,
1208
1254
  float freq_scale);
1209
1255
 
1256
+ // xPos RoPE, in-place, returns view(a)
1257
+ GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
1258
+ struct ggml_context * ctx,
1259
+ struct ggml_tensor * a,
1260
+ int n_past,
1261
+ int n_dims,
1262
+ float base,
1263
+ bool down);
1264
+
1210
1265
  // rotary position embedding backward, i.e compute dx from dy
1211
1266
  // a - dy
1212
1267
  GGML_API struct ggml_tensor * ggml_rope_back(
@@ -1215,7 +1270,11 @@ extern "C" {
1215
1270
  int n_past,
1216
1271
  int n_dims,
1217
1272
  int mode,
1218
- int n_ctx);
1273
+ int n_ctx,
1274
+ float freq_base,
1275
+ float freq_scale,
1276
+ float xpos_base,
1277
+ bool xpos_down);
1219
1278
 
1220
1279
  // alibi position embedding
1221
1280
  // in-place, returns view(a)
@@ -1242,6 +1301,15 @@ extern "C" {
1242
1301
  int p0, // padding
1243
1302
  int d0); // dilation
1244
1303
 
1304
+ // conv_1d with padding = half
1305
+ // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1306
+ GGML_API struct ggml_tensor* ggml_conv_1d_ph(
1307
+ struct ggml_context * ctx,
1308
+ struct ggml_tensor * a,
1309
+ struct ggml_tensor * b,
1310
+ int s,
1311
+ int d);
1312
+
1245
1313
  GGML_API struct ggml_tensor * ggml_conv_2d(
1246
1314
  struct ggml_context * ctx,
1247
1315
  struct ggml_tensor * a,
@@ -1253,14 +1321,38 @@ extern "C" {
1253
1321
  int d0,
1254
1322
  int d1);
1255
1323
 
1256
- // conv_1d with padding = half
1257
- // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1258
- GGML_API struct ggml_tensor * ggml_conv_1d_ph(
1324
+
1325
+ // kernel size is a->ne[0] x a->ne[1]
1326
+ // stride is equal to kernel size
1327
+ // padding is zero
1328
+ // example:
1329
+ // a: 16 16 3 768
1330
+ // b: 1024 1024 3 1
1331
+ // res: 64 64 768 1
1332
+ // used in sam
1333
+ GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
1334
+ struct ggml_context * ctx,
1335
+ struct ggml_tensor * a,
1336
+ struct ggml_tensor * b);
1337
+
1338
+ // kernel size is a->ne[0] x a->ne[1]
1339
+ // stride is 1
1340
+ // padding is half
1341
+ // example:
1342
+ // a: 3 3 256 256
1343
+ // b: 64 64 256 1
1344
+ // res: 64 64 256 1
1345
+ // used in sam
1346
+ GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph(
1347
+ struct ggml_context * ctx,
1348
+ struct ggml_tensor * a,
1349
+ struct ggml_tensor * b);
1350
+
1351
+ GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
1259
1352
  struct ggml_context * ctx,
1260
1353
  struct ggml_tensor * a,
1261
1354
  struct ggml_tensor * b,
1262
- int s,
1263
- int d);
1355
+ int stride);
1264
1356
 
1265
1357
  enum ggml_op_pool {
1266
1358
  GGML_OP_POOL_MAX,
@@ -1287,6 +1379,13 @@ extern "C" {
1287
1379
  int p0,
1288
1380
  int p1);
1289
1381
 
1382
+ // nearest interpolate
1383
+ // used in stable-diffusion
1384
+ GGML_API struct ggml_tensor * ggml_upscale(
1385
+ struct ggml_context * ctx,
1386
+ struct ggml_tensor * a,
1387
+ int scale_factor);
1388
+
1290
1389
  GGML_API struct ggml_tensor * ggml_flash_attn(
1291
1390
  struct ggml_context * ctx,
1292
1391
  struct ggml_tensor * q,
@@ -1340,6 +1439,27 @@ extern "C" {
1340
1439
  struct ggml_tensor * a,
1341
1440
  enum ggml_unary_op op);
1342
1441
 
1442
+ // used in sam
1443
+ GGML_API struct ggml_tensor * ggml_get_rel_pos(
1444
+ struct ggml_context * ctx,
1445
+ struct ggml_tensor * a,
1446
+ int qh,
1447
+ int kh);
1448
+
1449
+ // used in sam
1450
+
1451
+ GGML_API struct ggml_tensor * ggml_add_rel_pos(
1452
+ struct ggml_context * ctx,
1453
+ struct ggml_tensor * a,
1454
+ struct ggml_tensor * pw,
1455
+ struct ggml_tensor * ph);
1456
+
1457
+ GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
1458
+ struct ggml_context * ctx,
1459
+ struct ggml_tensor * a,
1460
+ struct ggml_tensor * pw,
1461
+ struct ggml_tensor * ph);
1462
+
1343
1463
  // custom operators
1344
1464
 
1345
1465
  typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
@@ -1495,7 +1615,8 @@ extern "C" {
1495
1615
  struct ggml_tensor * tensor);
1496
1616
 
1497
1617
 
1498
- GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1618
+ GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1619
+ GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
1499
1620
 
1500
1621
  GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
1501
1622
  GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
@@ -1560,6 +1681,8 @@ extern "C" {
1560
1681
  GGML_LINESEARCH_INVALID_PARAMETERS,
1561
1682
  };
1562
1683
 
1684
+ typedef void (*ggml_opt_callback)(void * data, float * sched);
1685
+
1563
1686
  // optimization parameters
1564
1687
  //
1565
1688
  // see ggml.c (ggml_opt_default_params) for default values
@@ -1595,12 +1718,14 @@ extern "C" {
1595
1718
 
1596
1719
  float sched; // schedule multiplier (fixed, decay or warmup)
1597
1720
  float decay; // weight decay for AdamW, use 0.0f to disable
1721
+ int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
1598
1722
  float alpha; // learning rate
1599
1723
  float beta1;
1600
1724
  float beta2;
1601
1725
  float eps; // epsilon for numerical stability
1602
1726
  float eps_f; // epsilon for convergence test
1603
1727
  float eps_g; // epsilon for convergence test
1728
+ float gclip; // gradient clipping
1604
1729
  } adam;
1605
1730
 
1606
1731
  // LBFGS parameters
@@ -1628,14 +1753,12 @@ extern "C" {
1628
1753
 
1629
1754
  bool just_initialized;
1630
1755
 
1756
+ float loss_before;
1757
+ float loss_after;
1758
+
1631
1759
  struct {
1632
- struct ggml_tensor * x; // view of the parameters
1633
- struct ggml_tensor * g1; // gradient
1634
- struct ggml_tensor * g2; // gradient squared
1635
1760
  struct ggml_tensor * m; // first moment
1636
1761
  struct ggml_tensor * v; // second moment
1637
- struct ggml_tensor * mh; // first moment hat
1638
- struct ggml_tensor * vh; // second moment hat
1639
1762
  struct ggml_tensor * pf; // past function values
1640
1763
  float fx_best;
1641
1764
  float fx_prev;
@@ -1672,10 +1795,10 @@ extern "C" {
1672
1795
 
1673
1796
  // initialize optimizer context
1674
1797
  GGML_API void ggml_opt_init(
1675
- struct ggml_context * ctx,
1798
+ struct ggml_context * ctx,
1676
1799
  struct ggml_opt_context * opt,
1677
- struct ggml_opt_params params,
1678
- int64_t nx);
1800
+ struct ggml_opt_params params,
1801
+ int64_t nx);
1679
1802
 
1680
1803
  // continue optimizing the function defined by the tensor f
1681
1804
  GGML_API enum ggml_opt_result ggml_opt_resume(
@@ -1689,7 +1812,9 @@ extern "C" {
1689
1812
  struct ggml_opt_context * opt,
1690
1813
  struct ggml_tensor * f,
1691
1814
  struct ggml_cgraph * gf,
1692
- struct ggml_cgraph * gb);
1815
+ struct ggml_cgraph * gb,
1816
+ ggml_opt_callback callback,
1817
+ void * callback_data);
1693
1818
 
1694
1819
  //
1695
1820
  // quantization
@@ -1703,6 +1828,127 @@ extern "C" {
1703
1828
 
1704
1829
  GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
1705
1830
 
1831
+ //
1832
+ // gguf
1833
+ //
1834
+
1835
+ enum gguf_type {
1836
+ GGUF_TYPE_UINT8 = 0,
1837
+ GGUF_TYPE_INT8 = 1,
1838
+ GGUF_TYPE_UINT16 = 2,
1839
+ GGUF_TYPE_INT16 = 3,
1840
+ GGUF_TYPE_UINT32 = 4,
1841
+ GGUF_TYPE_INT32 = 5,
1842
+ GGUF_TYPE_FLOAT32 = 6,
1843
+ GGUF_TYPE_BOOL = 7,
1844
+ GGUF_TYPE_STRING = 8,
1845
+ GGUF_TYPE_ARRAY = 9,
1846
+ GGUF_TYPE_UINT64 = 10,
1847
+ GGUF_TYPE_INT64 = 11,
1848
+ GGUF_TYPE_FLOAT64 = 12,
1849
+ GGUF_TYPE_COUNT, // marks the end of the enum
1850
+ };
1851
+
1852
+ struct gguf_context;
1853
+
1854
+ struct gguf_init_params {
1855
+ bool no_alloc;
1856
+
1857
+ // if not NULL, create a ggml_context and allocate the tensor data in it
1858
+ struct ggml_context ** ctx;
1859
+ };
1860
+
1861
+ GGML_API struct gguf_context * gguf_init_empty(void);
1862
+ GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
1863
+ //GGML_API struct gguf_context * gguf_init_from_buffer(..);
1864
+
1865
+ GGML_API void gguf_free(struct gguf_context * ctx);
1866
+
1867
+ GGML_API const char * gguf_type_name(enum gguf_type type);
1868
+
1869
+ GGML_API int gguf_get_version (struct gguf_context * ctx);
1870
+ GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);
1871
+ GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
1872
+ GGML_API void * gguf_get_data (struct gguf_context * ctx);
1873
+
1874
+ GGML_API int gguf_get_n_kv(struct gguf_context * ctx);
1875
+ GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);
1876
+ GGML_API const char * gguf_get_key (struct gguf_context * ctx, int i);
1877
+
1878
+ GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int i);
1879
+ GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i);
1880
+
1881
+ // results are undefined if the wrong type is used for the key
1882
+ GGML_API uint8_t gguf_get_val_u8 (struct gguf_context * ctx, int i);
1883
+ GGML_API int8_t gguf_get_val_i8 (struct gguf_context * ctx, int i);
1884
+ GGML_API uint16_t gguf_get_val_u16 (struct gguf_context * ctx, int i);
1885
+ GGML_API int16_t gguf_get_val_i16 (struct gguf_context * ctx, int i);
1886
+ GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
1887
+ GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
1888
+ GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
1889
+ GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i);
1890
+ GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i);
1891
+ GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int i);
1892
+ GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
1893
+ GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
1894
+ GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
1895
+ GGML_API const void * gguf_get_arr_data(struct gguf_context * ctx, int i);
1896
+ GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);
1897
+
1898
+ GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
1899
+ GGML_API int gguf_find_tensor (struct gguf_context * ctx, const char * name);
1900
+ GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
1901
+ GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);
1902
+
1903
+ // overrides existing values or adds a new one
1904
+ GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
1905
+ GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
1906
+ GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
1907
+ GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
1908
+ GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
1909
+ GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
1910
+ GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
1911
+ GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
1912
+ GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
1913
+ GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
1914
+ GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
1915
+ GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
1916
+ GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
1917
+ GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
1918
+
1919
+ // set or add KV pairs from another context
1920
+ GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
1921
+
1922
+ // manage tensor info
1923
+ GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
1924
+ GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
1925
+ GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
1926
+
1927
+ // writing gguf files can be done in 2 ways:
1928
+ //
1929
+ // - write the entire gguf_context to a binary file in a single pass:
1930
+ //
1931
+ // gguf_write_to_file(ctx, fname);
1932
+ //
1933
+ // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
1934
+ //
1935
+ // FILE * f = fopen(fname, "wb");
1936
+ // fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
1937
+ // fwrite(f, ...);
1938
+ // void * data = gguf_meta_get_meta_data(ctx);
1939
+ // fseek(f, 0, SEEK_SET);
1940
+ // fwrite(f, data, gguf_get_meta_size(ctx));
1941
+ // free(data);
1942
+ // fclose(f);
1943
+ //
1944
+
1945
+ // write the entire context to a binary file
1946
+ GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta);
1947
+
1948
+ // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
1949
+ GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx);
1950
+ GGML_API void gguf_get_meta_data(struct gguf_context * ctx, void * data);
1951
+
1706
1952
  //
1707
1953
  // system info
1708
1954
  //
@@ -1723,6 +1969,7 @@ extern "C" {
1723
1969
  GGML_API int ggml_cpu_has_clblast (void);
1724
1970
  GGML_API int ggml_cpu_has_gpublas (void);
1725
1971
  GGML_API int ggml_cpu_has_sse3 (void);
1972
+ GGML_API int ggml_cpu_has_ssse3 (void);
1726
1973
  GGML_API int ggml_cpu_has_vsx (void);
1727
1974
 
1728
1975
  //
@@ -1740,6 +1987,10 @@ extern "C" {
1740
1987
  typedef void (*ggml_vec_dot_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
1741
1988
 
1742
1989
  typedef struct {
1990
+ const char * type_name;
1991
+ int blck_size;
1992
+ size_t type_size;
1993
+ bool is_quantized;
1743
1994
  ggml_to_float_t to_float;
1744
1995
  ggml_from_float_t from_float;
1745
1996
  ggml_from_float_t from_float_reference;
@@ -1747,7 +1998,7 @@ extern "C" {
1747
1998
  enum ggml_type vec_dot_type;
1748
1999
  } ggml_type_traits_t;
1749
2000
 
1750
- ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type i);
2001
+ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
1751
2002
 
1752
2003
  #ifdef __cplusplus
1753
2004
  }