llama_cpp 0.3.8 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -130,13 +130,16 @@
130
130
  // The data of the tensor is accessed via the "data" pointer. For example:
131
131
  //
132
132
  // {
133
- // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3);
133
+ // const int nx = 2;
134
+ // const int ny = 3;
134
135
  //
135
- // // a[2, 1] = 1.0f;
136
- // *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
136
+ // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, ny);
137
137
  //
138
- // // a[0, 2] = 2.0f;
139
- // *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f;
138
+ // for (int y = 0; y < ny; y++) {
139
+ // for (int x = 0; x < nx; x++) {
140
+ // *(float *) ((char *) a->data + y*a->nb[1] + x*a->nb[0]) = x + y;
141
+ // }
142
+ // }
140
143
  //
141
144
  // ...
142
145
  // }
@@ -207,14 +210,24 @@
207
210
  #define GGML_MAX_PARAMS 256
208
211
  #define GGML_MAX_CONTEXTS 64
209
212
  #define GGML_MAX_SRC 6
210
- #define GGML_MAX_NAME 48
213
+ #define GGML_MAX_NAME 64
211
214
  #define GGML_MAX_OP_PARAMS 32
212
215
  #define GGML_DEFAULT_N_THREADS 4
213
216
 
217
+ #if UINTPTR_MAX == 0xFFFFFFFF
218
+ #define GGML_MEM_ALIGN 4
219
+ #else
220
+ #define GGML_MEM_ALIGN 16
221
+ #endif
214
222
 
215
223
  #define GGML_EXIT_SUCCESS 0
216
224
  #define GGML_EXIT_ABORTED 1
217
225
 
226
+ #define GGUF_MAGIC 0x46554747 // "GGUF"
227
+ #define GGUF_VERSION 2
228
+
229
+ #define GGUF_DEFAULT_ALIGNMENT 32
230
+
218
231
  #define GGML_UNUSED(x) (void)(x)
219
232
 
220
233
  #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
@@ -255,8 +268,9 @@
255
268
  extern "C" {
256
269
  #endif
257
270
 
258
- #ifdef __ARM_NEON
259
- // we use the built-in 16-bit float type
271
+ #if defined(__ARM_NEON) && defined(__CUDACC__)
272
+ typedef half ggml_fp16_t;
273
+ #elif defined(__ARM_NEON)
260
274
  typedef __fp16 ggml_fp16_t;
261
275
  #else
262
276
  typedef uint16_t ggml_fp16_t;
@@ -340,10 +354,12 @@ extern "C" {
340
354
  GGML_OP_ARGMAX,
341
355
  GGML_OP_REPEAT,
342
356
  GGML_OP_REPEAT_BACK,
357
+ GGML_OP_CONCAT,
343
358
  GGML_OP_SILU_BACK,
344
359
  GGML_OP_NORM, // normalize
345
360
  GGML_OP_RMS_NORM,
346
361
  GGML_OP_RMS_NORM_BACK,
362
+ GGML_OP_GROUP_NORM,
347
363
 
348
364
  GGML_OP_MUL_MAT,
349
365
  GGML_OP_OUT_PROD,
@@ -369,14 +385,19 @@ extern "C" {
369
385
  GGML_OP_CLAMP,
370
386
  GGML_OP_CONV_1D,
371
387
  GGML_OP_CONV_2D,
388
+ GGML_OP_CONV_TRANSPOSE_2D,
372
389
  GGML_OP_POOL_1D,
373
390
  GGML_OP_POOL_2D,
374
391
 
392
+ GGML_OP_UPSCALE, // nearest interpolate
393
+
375
394
  GGML_OP_FLASH_ATTN,
376
395
  GGML_OP_FLASH_FF,
377
396
  GGML_OP_FLASH_ATTN_BACK,
378
397
  GGML_OP_WIN_PART,
379
398
  GGML_OP_WIN_UNPART,
399
+ GGML_OP_GET_REL_POS,
400
+ GGML_OP_ADD_REL_POS,
380
401
 
381
402
  GGML_OP_UNARY,
382
403
 
@@ -458,6 +479,9 @@ extern "C" {
458
479
  int64_t perf_cycles;
459
480
  int64_t perf_time_us;
460
481
 
482
+ struct ggml_tensor * view_src;
483
+ size_t view_offs;
484
+
461
485
  void * data;
462
486
 
463
487
  char name[GGML_MAX_NAME];
@@ -562,6 +586,7 @@ extern "C" {
562
586
  GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
563
587
  GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
564
588
  GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
589
+ GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
565
590
  GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
566
591
 
567
592
  GGML_API int ggml_blck_size (enum ggml_type type);
@@ -639,7 +664,7 @@ extern "C" {
639
664
  GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
640
665
 
641
666
  GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
642
- GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src);
667
+ GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
643
668
 
644
669
  GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
645
670
 
@@ -799,6 +824,13 @@ extern "C" {
799
824
  struct ggml_tensor * a,
800
825
  struct ggml_tensor * b);
801
826
 
827
+ // concat a and b on dim 2
828
+ // used in stable-diffusion
829
+ GGML_API struct ggml_tensor * ggml_concat(
830
+ struct ggml_context * ctx,
831
+ struct ggml_tensor * a,
832
+ struct ggml_tensor * b);
833
+
802
834
  GGML_API struct ggml_tensor * ggml_abs(
803
835
  struct ggml_context * ctx,
804
836
  struct ggml_tensor * a);
@@ -888,14 +920,15 @@ extern "C" {
888
920
  struct ggml_tensor * b);
889
921
 
890
922
  // normalize along rows
891
- // TODO: eps is hardcoded to 1e-5 for now
892
923
  GGML_API struct ggml_tensor * ggml_norm(
893
924
  struct ggml_context * ctx,
894
- struct ggml_tensor * a);
925
+ struct ggml_tensor * a,
926
+ float eps);
895
927
 
896
928
  GGML_API struct ggml_tensor * ggml_norm_inplace(
897
929
  struct ggml_context * ctx,
898
- struct ggml_tensor * a);
930
+ struct ggml_tensor * a,
931
+ float eps);
899
932
 
900
933
  GGML_API struct ggml_tensor * ggml_rms_norm(
901
934
  struct ggml_context * ctx,
@@ -907,13 +940,26 @@ extern "C" {
907
940
  struct ggml_tensor * a,
908
941
  float eps);
909
942
 
943
+ // group normalize along ne0*ne1*n_groups
944
+ // used in stable-diffusion
945
+ // TODO: eps is hardcoded to 1e-6 for now
946
+ GGML_API struct ggml_tensor * ggml_group_norm(
947
+ struct ggml_context * ctx,
948
+ struct ggml_tensor * a,
949
+ int n_groups);
950
+
951
+ GGML_API struct ggml_tensor * ggml_group_norm_inplace(
952
+ struct ggml_context * ctx,
953
+ struct ggml_tensor * a,
954
+ int n_groups);
955
+
910
956
  // a - x
911
957
  // b - dy
912
- // TODO: update with configurable eps
913
958
  GGML_API struct ggml_tensor * ggml_rms_norm_back(
914
959
  struct ggml_context * ctx,
915
960
  struct ggml_tensor * a,
916
- struct ggml_tensor * b);
961
+ struct ggml_tensor * b,
962
+ float eps);
917
963
 
918
964
  // A: n columns, m rows
919
965
  // B: n columns, p rows (i.e. we transpose it internally)
@@ -1207,6 +1253,15 @@ extern "C" {
1207
1253
  float freq_base,
1208
1254
  float freq_scale);
1209
1255
 
1256
+ // xPos RoPE, in-place, returns view(a)
1257
+ GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
1258
+ struct ggml_context * ctx,
1259
+ struct ggml_tensor * a,
1260
+ int n_past,
1261
+ int n_dims,
1262
+ float base,
1263
+ bool down);
1264
+
1210
1265
  // rotary position embedding backward, i.e compute dx from dy
1211
1266
  // a - dy
1212
1267
  GGML_API struct ggml_tensor * ggml_rope_back(
@@ -1215,7 +1270,11 @@ extern "C" {
1215
1270
  int n_past,
1216
1271
  int n_dims,
1217
1272
  int mode,
1218
- int n_ctx);
1273
+ int n_ctx,
1274
+ float freq_base,
1275
+ float freq_scale,
1276
+ float xpos_base,
1277
+ bool xpos_down);
1219
1278
 
1220
1279
  // alibi position embedding
1221
1280
  // in-place, returns view(a)
@@ -1242,6 +1301,15 @@ extern "C" {
1242
1301
  int p0, // padding
1243
1302
  int d0); // dilation
1244
1303
 
1304
+ // conv_1d with padding = half
1305
+ // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1306
+ GGML_API struct ggml_tensor* ggml_conv_1d_ph(
1307
+ struct ggml_context * ctx,
1308
+ struct ggml_tensor * a,
1309
+ struct ggml_tensor * b,
1310
+ int s,
1311
+ int d);
1312
+
1245
1313
  GGML_API struct ggml_tensor * ggml_conv_2d(
1246
1314
  struct ggml_context * ctx,
1247
1315
  struct ggml_tensor * a,
@@ -1253,14 +1321,38 @@ extern "C" {
1253
1321
  int d0,
1254
1322
  int d1);
1255
1323
 
1256
- // conv_1d with padding = half
1257
- // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1258
- GGML_API struct ggml_tensor * ggml_conv_1d_ph(
1324
+
1325
+ // kernel size is a->ne[0] x a->ne[1]
1326
+ // stride is equal to kernel size
1327
+ // padding is zero
1328
+ // example:
1329
+ // a: 16 16 3 768
1330
+ // b: 1024 1024 3 1
1331
+ // res: 64 64 768 1
1332
+ // used in sam
1333
+ GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
1334
+ struct ggml_context * ctx,
1335
+ struct ggml_tensor * a,
1336
+ struct ggml_tensor * b);
1337
+
1338
+ // kernel size is a->ne[0] x a->ne[1]
1339
+ // stride is 1
1340
+ // padding is half
1341
+ // example:
1342
+ // a: 3 3 256 256
1343
+ // b: 64 64 256 1
1344
+ // res: 64 64 256 1
1345
+ // used in sam
1346
+ GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph(
1347
+ struct ggml_context * ctx,
1348
+ struct ggml_tensor * a,
1349
+ struct ggml_tensor * b);
1350
+
1351
+ GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
1259
1352
  struct ggml_context * ctx,
1260
1353
  struct ggml_tensor * a,
1261
1354
  struct ggml_tensor * b,
1262
- int s,
1263
- int d);
1355
+ int stride);
1264
1356
 
1265
1357
  enum ggml_op_pool {
1266
1358
  GGML_OP_POOL_MAX,
@@ -1287,6 +1379,13 @@ extern "C" {
1287
1379
  int p0,
1288
1380
  int p1);
1289
1381
 
1382
+ // nearest interpolate
1383
+ // used in stable-diffusion
1384
+ GGML_API struct ggml_tensor * ggml_upscale(
1385
+ struct ggml_context * ctx,
1386
+ struct ggml_tensor * a,
1387
+ int scale_factor);
1388
+
1290
1389
  GGML_API struct ggml_tensor * ggml_flash_attn(
1291
1390
  struct ggml_context * ctx,
1292
1391
  struct ggml_tensor * q,
@@ -1340,6 +1439,27 @@ extern "C" {
1340
1439
  struct ggml_tensor * a,
1341
1440
  enum ggml_unary_op op);
1342
1441
 
1442
+ // used in sam
1443
+ GGML_API struct ggml_tensor * ggml_get_rel_pos(
1444
+ struct ggml_context * ctx,
1445
+ struct ggml_tensor * a,
1446
+ int qh,
1447
+ int kh);
1448
+
1449
+ // used in sam
1450
+
1451
+ GGML_API struct ggml_tensor * ggml_add_rel_pos(
1452
+ struct ggml_context * ctx,
1453
+ struct ggml_tensor * a,
1454
+ struct ggml_tensor * pw,
1455
+ struct ggml_tensor * ph);
1456
+
1457
+ GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
1458
+ struct ggml_context * ctx,
1459
+ struct ggml_tensor * a,
1460
+ struct ggml_tensor * pw,
1461
+ struct ggml_tensor * ph);
1462
+
1343
1463
  // custom operators
1344
1464
 
1345
1465
  typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
@@ -1495,7 +1615,8 @@ extern "C" {
1495
1615
  struct ggml_tensor * tensor);
1496
1616
 
1497
1617
 
1498
- GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1618
+ GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1619
+ GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
1499
1620
 
1500
1621
  GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
1501
1622
  GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
@@ -1560,6 +1681,8 @@ extern "C" {
1560
1681
  GGML_LINESEARCH_INVALID_PARAMETERS,
1561
1682
  };
1562
1683
 
1684
+ typedef void (*ggml_opt_callback)(void * data, float * sched);
1685
+
1563
1686
  // optimization parameters
1564
1687
  //
1565
1688
  // see ggml.c (ggml_opt_default_params) for default values
@@ -1595,12 +1718,14 @@ extern "C" {
1595
1718
 
1596
1719
  float sched; // schedule multiplier (fixed, decay or warmup)
1597
1720
  float decay; // weight decay for AdamW, use 0.0f to disable
1721
+ int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
1598
1722
  float alpha; // learning rate
1599
1723
  float beta1;
1600
1724
  float beta2;
1601
1725
  float eps; // epsilon for numerical stability
1602
1726
  float eps_f; // epsilon for convergence test
1603
1727
  float eps_g; // epsilon for convergence test
1728
+ float gclip; // gradient clipping
1604
1729
  } adam;
1605
1730
 
1606
1731
  // LBFGS parameters
@@ -1628,14 +1753,12 @@ extern "C" {
1628
1753
 
1629
1754
  bool just_initialized;
1630
1755
 
1756
+ float loss_before;
1757
+ float loss_after;
1758
+
1631
1759
  struct {
1632
- struct ggml_tensor * x; // view of the parameters
1633
- struct ggml_tensor * g1; // gradient
1634
- struct ggml_tensor * g2; // gradient squared
1635
1760
  struct ggml_tensor * m; // first moment
1636
1761
  struct ggml_tensor * v; // second moment
1637
- struct ggml_tensor * mh; // first moment hat
1638
- struct ggml_tensor * vh; // second moment hat
1639
1762
  struct ggml_tensor * pf; // past function values
1640
1763
  float fx_best;
1641
1764
  float fx_prev;
@@ -1672,10 +1795,10 @@ extern "C" {
1672
1795
 
1673
1796
  // initialize optimizer context
1674
1797
  GGML_API void ggml_opt_init(
1675
- struct ggml_context * ctx,
1798
+ struct ggml_context * ctx,
1676
1799
  struct ggml_opt_context * opt,
1677
- struct ggml_opt_params params,
1678
- int64_t nx);
1800
+ struct ggml_opt_params params,
1801
+ int64_t nx);
1679
1802
 
1680
1803
  // continue optimizing the function defined by the tensor f
1681
1804
  GGML_API enum ggml_opt_result ggml_opt_resume(
@@ -1689,7 +1812,9 @@ extern "C" {
1689
1812
  struct ggml_opt_context * opt,
1690
1813
  struct ggml_tensor * f,
1691
1814
  struct ggml_cgraph * gf,
1692
- struct ggml_cgraph * gb);
1815
+ struct ggml_cgraph * gb,
1816
+ ggml_opt_callback callback,
1817
+ void * callback_data);
1693
1818
 
1694
1819
  //
1695
1820
  // quantization
@@ -1703,6 +1828,127 @@ extern "C" {
1703
1828
 
1704
1829
  GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
1705
1830
 
1831
+ //
1832
+ // gguf
1833
+ //
1834
+
1835
+ enum gguf_type {
1836
+ GGUF_TYPE_UINT8 = 0,
1837
+ GGUF_TYPE_INT8 = 1,
1838
+ GGUF_TYPE_UINT16 = 2,
1839
+ GGUF_TYPE_INT16 = 3,
1840
+ GGUF_TYPE_UINT32 = 4,
1841
+ GGUF_TYPE_INT32 = 5,
1842
+ GGUF_TYPE_FLOAT32 = 6,
1843
+ GGUF_TYPE_BOOL = 7,
1844
+ GGUF_TYPE_STRING = 8,
1845
+ GGUF_TYPE_ARRAY = 9,
1846
+ GGUF_TYPE_UINT64 = 10,
1847
+ GGUF_TYPE_INT64 = 11,
1848
+ GGUF_TYPE_FLOAT64 = 12,
1849
+ GGUF_TYPE_COUNT, // marks the end of the enum
1850
+ };
1851
+
1852
+ struct gguf_context;
1853
+
1854
+ struct gguf_init_params {
1855
+ bool no_alloc;
1856
+
1857
+ // if not NULL, create a ggml_context and allocate the tensor data in it
1858
+ struct ggml_context ** ctx;
1859
+ };
1860
+
1861
+ GGML_API struct gguf_context * gguf_init_empty(void);
1862
+ GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
1863
+ //GGML_API struct gguf_context * gguf_init_from_buffer(..);
1864
+
1865
+ GGML_API void gguf_free(struct gguf_context * ctx);
1866
+
1867
+ GGML_API const char * gguf_type_name(enum gguf_type type);
1868
+
1869
+ GGML_API int gguf_get_version (struct gguf_context * ctx);
1870
+ GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);
1871
+ GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
1872
+ GGML_API void * gguf_get_data (struct gguf_context * ctx);
1873
+
1874
+ GGML_API int gguf_get_n_kv(struct gguf_context * ctx);
1875
+ GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);
1876
+ GGML_API const char * gguf_get_key (struct gguf_context * ctx, int i);
1877
+
1878
+ GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int i);
1879
+ GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i);
1880
+
1881
+ // results are undefined if the wrong type is used for the key
1882
+ GGML_API uint8_t gguf_get_val_u8 (struct gguf_context * ctx, int i);
1883
+ GGML_API int8_t gguf_get_val_i8 (struct gguf_context * ctx, int i);
1884
+ GGML_API uint16_t gguf_get_val_u16 (struct gguf_context * ctx, int i);
1885
+ GGML_API int16_t gguf_get_val_i16 (struct gguf_context * ctx, int i);
1886
+ GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
1887
+ GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
1888
+ GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
1889
+ GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i);
1890
+ GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i);
1891
+ GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int i);
1892
+ GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
1893
+ GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
1894
+ GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
1895
+ GGML_API const void * gguf_get_arr_data(struct gguf_context * ctx, int i);
1896
+ GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);
1897
+
1898
+ GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
1899
+ GGML_API int gguf_find_tensor (struct gguf_context * ctx, const char * name);
1900
+ GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
1901
+ GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);
1902
+
1903
+ // overrides existing values or adds a new one
1904
+ GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
1905
+ GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
1906
+ GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
1907
+ GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
1908
+ GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
1909
+ GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
1910
+ GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
1911
+ GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
1912
+ GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
1913
+ GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
1914
+ GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
1915
+ GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
1916
+ GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
1917
+ GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
1918
+
1919
+ // set or add KV pairs from another context
1920
+ GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
1921
+
1922
+ // manage tensor info
1923
+ GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
1924
+ GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
1925
+ GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
1926
+
1927
+ // writing gguf files can be done in 2 ways:
1928
+ //
1929
+ // - write the entire gguf_context to a binary file in a single pass:
1930
+ //
1931
+ // gguf_write_to_file(ctx, fname);
1932
+ //
1933
+ // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
1934
+ //
1935
+ // FILE * f = fopen(fname, "wb");
1936
+ // fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
1937
+ // fwrite(f, ...);
1938
+ // void * data = gguf_meta_get_meta_data(ctx);
1939
+ // fseek(f, 0, SEEK_SET);
1940
+ // fwrite(f, data, gguf_get_meta_size(ctx));
1941
+ // free(data);
1942
+ // fclose(f);
1943
+ //
1944
+
1945
+ // write the entire context to a binary file
1946
+ GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta);
1947
+
1948
+ // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
1949
+ GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx);
1950
+ GGML_API void gguf_get_meta_data(struct gguf_context * ctx, void * data);
1951
+
1706
1952
  //
1707
1953
  // system info
1708
1954
  //
@@ -1723,6 +1969,7 @@ extern "C" {
1723
1969
  GGML_API int ggml_cpu_has_clblast (void);
1724
1970
  GGML_API int ggml_cpu_has_gpublas (void);
1725
1971
  GGML_API int ggml_cpu_has_sse3 (void);
1972
+ GGML_API int ggml_cpu_has_ssse3 (void);
1726
1973
  GGML_API int ggml_cpu_has_vsx (void);
1727
1974
 
1728
1975
  //
@@ -1740,6 +1987,10 @@ extern "C" {
1740
1987
  typedef void (*ggml_vec_dot_t) (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
1741
1988
 
1742
1989
  typedef struct {
1990
+ const char * type_name;
1991
+ int blck_size;
1992
+ size_t type_size;
1993
+ bool is_quantized;
1743
1994
  ggml_to_float_t to_float;
1744
1995
  ggml_from_float_t from_float;
1745
1996
  ggml_from_float_t from_float_reference;
@@ -1747,7 +1998,7 @@ extern "C" {
1747
1998
  enum ggml_type vec_dot_type;
1748
1999
  } ggml_type_traits_t;
1749
2000
 
1750
- ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type i);
2001
+ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
1751
2002
 
1752
2003
  #ifdef __cplusplus
1753
2004
  }