whisper.rn 0.4.0-rc.10 → 0.4.0-rc.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/android/src/main/CMakeLists.txt +9 -3
  2. package/cpp/amx/amx.cpp +220 -0
  3. package/cpp/amx/amx.h +8 -0
  4. package/cpp/amx/common.h +91 -0
  5. package/cpp/amx/mmq.cpp +2511 -0
  6. package/cpp/amx/mmq.h +10 -0
  7. package/cpp/ggml-alloc.c +6 -14
  8. package/cpp/ggml-backend-impl.h +50 -11
  9. package/cpp/ggml-backend-reg.cpp +409 -31
  10. package/cpp/ggml-backend.cpp +9 -3
  11. package/cpp/ggml-backend.h +18 -0
  12. package/cpp/ggml-common.h +41 -43
  13. package/cpp/ggml-cpp.h +1 -0
  14. package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +941 -254
  15. package/cpp/ggml-cpu-aarch64.h +2 -24
  16. package/cpp/ggml-cpu-impl.h +171 -11
  17. package/cpp/ggml-cpu-quants.c +1812 -389
  18. package/cpp/ggml-cpu-traits.cpp +36 -0
  19. package/cpp/ggml-cpu-traits.h +38 -0
  20. package/cpp/ggml-cpu.c +1432 -610
  21. package/cpp/ggml-cpu.cpp +131 -141
  22. package/cpp/ggml-cpu.h +10 -50
  23. package/cpp/ggml-impl.h +27 -11
  24. package/cpp/ggml-metal-impl.h +39 -0
  25. package/cpp/ggml-metal.h +1 -1
  26. package/cpp/ggml-metal.m +1031 -359
  27. package/cpp/ggml-opt.cpp +854 -0
  28. package/cpp/ggml-opt.h +216 -0
  29. package/cpp/ggml-quants.c +0 -9
  30. package/cpp/ggml-threading.h +4 -2
  31. package/cpp/ggml-whisper.metallib +0 -0
  32. package/cpp/ggml.c +501 -1537
  33. package/cpp/ggml.h +144 -171
  34. package/cpp/gguf.cpp +1329 -0
  35. package/cpp/gguf.h +202 -0
  36. package/cpp/whisper.cpp +254 -114
  37. package/cpp/whisper.h +6 -3
  38. package/lib/commonjs/version.json +1 -1
  39. package/lib/module/version.json +1 -1
  40. package/package.json +2 -1
  41. package/src/version.json +1 -1
  42. package/whisper-rn.podspec +2 -2
  43. package/cpp/README.md +0 -4
  44. package/cpp/ggml-aarch64.c +0 -129
  45. package/cpp/ggml-aarch64.h +0 -19
  46. package/cpp/ggml-backend.cpp.rej +0 -12
package/cpp/ggml.h CHANGED
@@ -198,7 +198,7 @@
198
198
 
199
199
  #ifndef __GNUC__
200
200
  # define WSP_GGML_ATTRIBUTE_FORMAT(...)
201
- #elif defined(__MINGW32__)
201
+ #elif defined(__MINGW32__) && !defined(__clang__)
202
202
  # define WSP_GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
203
203
  #else
204
204
  # define WSP_GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
@@ -237,13 +237,9 @@
237
237
  #define WSP_GGML_EXIT_SUCCESS 0
238
238
  #define WSP_GGML_EXIT_ABORTED 1
239
239
 
240
- #define WSP_GGML_ROPE_TYPE_NEOX 2
241
-
242
- #define WSP_GGUF_MAGIC "GGUF"
243
-
244
- #define WSP_GGUF_VERSION 3
245
-
246
- #define WSP_GGUF_DEFAULT_ALIGNMENT 32
240
+ #define WSP_GGML_ROPE_TYPE_NEOX 2
241
+ #define WSP_GGML_ROPE_TYPE_MROPE 8
242
+ #define WSP_GGML_ROPE_TYPE_VISION 24
247
243
 
248
244
  #define WSP_GGML_UNUSED(x) (void)(x)
249
245
 
@@ -384,12 +380,15 @@ extern "C" {
384
380
  WSP_GGML_TYPE_F64 = 28,
385
381
  WSP_GGML_TYPE_IQ1_M = 29,
386
382
  WSP_GGML_TYPE_BF16 = 30,
387
- WSP_GGML_TYPE_Q4_0_4_4 = 31,
388
- WSP_GGML_TYPE_Q4_0_4_8 = 32,
389
- WSP_GGML_TYPE_Q4_0_8_8 = 33,
383
+ // WSP_GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
384
+ // WSP_GGML_TYPE_Q4_0_4_8 = 32,
385
+ // WSP_GGML_TYPE_Q4_0_8_8 = 33,
390
386
  WSP_GGML_TYPE_TQ1_0 = 34,
391
387
  WSP_GGML_TYPE_TQ2_0 = 35,
392
- WSP_GGML_TYPE_COUNT,
388
+ // WSP_GGML_TYPE_IQ4_NL_4_4 = 36,
389
+ // WSP_GGML_TYPE_IQ4_NL_4_8 = 37,
390
+ // WSP_GGML_TYPE_IQ4_NL_8_8 = 38,
391
+ WSP_GGML_TYPE_COUNT = 39,
393
392
  };
394
393
 
395
394
  // precision
@@ -398,12 +397,6 @@ extern "C" {
398
397
  WSP_GGML_PREC_F32,
399
398
  };
400
399
 
401
- enum wsp_ggml_backend_type {
402
- WSP_GGML_BACKEND_TYPE_CPU = 0,
403
- WSP_GGML_BACKEND_TYPE_GPU = 10,
404
- WSP_GGML_BACKEND_TYPE_GPU_SPLIT = 20,
405
- };
406
-
407
400
  // model file types
408
401
  enum wsp_ggml_ftype {
409
402
  WSP_GGML_FTYPE_UNKNOWN = -1,
@@ -430,9 +423,6 @@ extern "C" {
430
423
  WSP_GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
431
424
  WSP_GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
432
425
  WSP_GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
433
- WSP_GGML_FTYPE_MOSTLY_Q4_0_4_4 = 25, // except 1d tensors
434
- WSP_GGML_FTYPE_MOSTLY_Q4_0_4_8 = 26, // except 1d tensors
435
- WSP_GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
436
426
  };
437
427
 
438
428
  // available tensor operations:
@@ -496,6 +486,7 @@ extern "C" {
496
486
  WSP_GGML_OP_POOL_2D_BACK,
497
487
  WSP_GGML_OP_UPSCALE, // nearest interpolate
498
488
  WSP_GGML_OP_PAD,
489
+ WSP_GGML_OP_PAD_REFLECT_1D,
499
490
  WSP_GGML_OP_ARANGE,
500
491
  WSP_GGML_OP_TIMESTEP_EMBEDDING,
501
492
  WSP_GGML_OP_ARGSORT,
@@ -510,6 +501,7 @@ extern "C" {
510
501
  WSP_GGML_OP_GET_REL_POS,
511
502
  WSP_GGML_OP_ADD_REL_POS,
512
503
  WSP_GGML_OP_RWKV_WKV6,
504
+ WSP_GGML_OP_GATED_LINEAR_ATTN,
513
505
 
514
506
  WSP_GGML_OP_UNARY,
515
507
 
@@ -584,8 +576,6 @@ extern "C" {
584
576
  struct wsp_ggml_tensor {
585
577
  enum wsp_ggml_type type;
586
578
 
587
- WSP_GGML_DEPRECATED(enum wsp_ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
588
-
589
579
  struct wsp_ggml_backend_buffer * buffer;
590
580
 
591
581
  int64_t ne[WSP_GGML_MAX_DIMS]; // number of elements
@@ -1394,16 +1384,20 @@ extern "C" {
1394
1384
  float scale,
1395
1385
  float max_bias);
1396
1386
 
1397
- WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_back(
1387
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_back(
1398
1388
  struct wsp_ggml_context * ctx,
1399
1389
  struct wsp_ggml_tensor * a,
1400
- struct wsp_ggml_tensor * b);
1390
+ struct wsp_ggml_tensor * b,
1391
+ float scale,
1392
+ float max_bias);
1401
1393
 
1402
1394
  // in-place, returns view(a)
1403
- WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_back_inplace(
1395
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_back_inplace(
1404
1396
  struct wsp_ggml_context * ctx,
1405
1397
  struct wsp_ggml_tensor * a,
1406
- struct wsp_ggml_tensor * b);
1398
+ struct wsp_ggml_tensor * b,
1399
+ float scale,
1400
+ float max_bias);
1407
1401
 
1408
1402
  // rotary position embedding
1409
1403
  // if (mode & 1) - skip n_past elements (NOT SUPPORTED)
@@ -1442,6 +1436,22 @@ extern "C" {
1442
1436
  float beta_fast,
1443
1437
  float beta_slow);
1444
1438
 
1439
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_multi(
1440
+ struct wsp_ggml_context * ctx,
1441
+ struct wsp_ggml_tensor * a,
1442
+ struct wsp_ggml_tensor * b,
1443
+ struct wsp_ggml_tensor * c,
1444
+ int n_dims,
1445
+ int sections[4],
1446
+ int mode,
1447
+ int n_ctx_orig,
1448
+ float freq_base,
1449
+ float freq_scale,
1450
+ float ext_factor,
1451
+ float attn_factor,
1452
+ float beta_fast,
1453
+ float beta_slow);
1454
+
1445
1455
  // in-place, returns view(a)
1446
1456
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_ext_inplace(
1447
1457
  struct wsp_ggml_context * ctx,
@@ -1494,7 +1504,7 @@ extern "C" {
1494
1504
 
1495
1505
  // rotary position embedding backward, i.e compute dx from dy
1496
1506
  // a - dy
1497
- WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_back(
1507
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_ext_back(
1498
1508
  struct wsp_ggml_context * ctx,
1499
1509
  struct wsp_ggml_tensor * a, // gradients of wsp_ggml_rope result
1500
1510
  struct wsp_ggml_tensor * b, // positions
@@ -1509,6 +1519,23 @@ extern "C" {
1509
1519
  float beta_fast,
1510
1520
  float beta_slow);
1511
1521
 
1522
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_multi_back(
1523
+ struct wsp_ggml_context * ctx,
1524
+ struct wsp_ggml_tensor * a,
1525
+ struct wsp_ggml_tensor * b,
1526
+ struct wsp_ggml_tensor * c,
1527
+ int n_dims,
1528
+ int sections[4],
1529
+ int mode,
1530
+ int n_ctx_orig,
1531
+ float freq_base,
1532
+ float freq_scale,
1533
+ float ext_factor,
1534
+ float attn_factor,
1535
+ float beta_fast,
1536
+ float beta_slow);
1537
+
1538
+
1512
1539
  // clamp
1513
1540
  // in-place, returns view(a)
1514
1541
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_clamp(
@@ -1545,17 +1572,6 @@ extern "C" {
1545
1572
  int d1, // dilation dimension 1
1546
1573
  bool is_2D);
1547
1574
 
1548
- WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_depthwise_2d(
1549
- struct wsp_ggml_context * ctx,
1550
- struct wsp_ggml_tensor * a, // convolution kernel
1551
- struct wsp_ggml_tensor * b, // data
1552
- int s0, // stride dimension 0
1553
- int s1, // stride dimension 1
1554
- int p0, // padding dimension 0
1555
- int p1, // padding dimension 1
1556
- int d0, // dilation dimension 0
1557
- int d1); // dilation dimension 1
1558
-
1559
1575
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_1d(
1560
1576
  struct wsp_ggml_context * ctx,
1561
1577
  struct wsp_ggml_tensor * a, // convolution kernel
@@ -1573,6 +1589,23 @@ extern "C" {
1573
1589
  int s, // stride
1574
1590
  int d); // dilation
1575
1591
 
1592
+ // depthwise
1593
+ // TODO: this is very likely wrong for some cases! - needs more testing
1594
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_1d_dw(
1595
+ struct wsp_ggml_context * ctx,
1596
+ struct wsp_ggml_tensor * a, // convolution kernel
1597
+ struct wsp_ggml_tensor * b, // data
1598
+ int s0, // stride
1599
+ int p0, // padding
1600
+ int d0); // dilation
1601
+
1602
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_1d_dw_ph(
1603
+ struct wsp_ggml_context * ctx,
1604
+ struct wsp_ggml_tensor * a, // convolution kernel
1605
+ struct wsp_ggml_tensor * b, // data
1606
+ int s0, // stride
1607
+ int d0); // dilation
1608
+
1576
1609
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_transpose_1d(
1577
1610
  struct wsp_ggml_context * ctx,
1578
1611
  struct wsp_ggml_tensor * a, // convolution kernel
@@ -1592,7 +1625,6 @@ extern "C" {
1592
1625
  int d0, // dilation dimension 0
1593
1626
  int d1); // dilation dimension 1
1594
1627
 
1595
-
1596
1628
  // kernel size is a->ne[0] x a->ne[1]
1597
1629
  // stride is equal to kernel size
1598
1630
  // padding is zero
@@ -1619,6 +1651,18 @@ extern "C" {
1619
1651
  struct wsp_ggml_tensor * a,
1620
1652
  struct wsp_ggml_tensor * b);
1621
1653
 
1654
+ // depthwise
1655
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_2d_dw(
1656
+ struct wsp_ggml_context * ctx,
1657
+ struct wsp_ggml_tensor * a, // convolution kernel
1658
+ struct wsp_ggml_tensor * b, // data
1659
+ int s0, // stride dimension 0
1660
+ int s1, // stride dimension 1
1661
+ int p0, // padding dimension 0
1662
+ int p1, // padding dimension 1
1663
+ int d0, // dilation dimension 0
1664
+ int d1); // dilation dimension 1
1665
+
1622
1666
  WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_transpose_2d_p0(
1623
1667
  struct wsp_ggml_context * ctx,
1624
1668
  struct wsp_ggml_tensor * a,
@@ -1692,6 +1736,13 @@ extern "C" {
1692
1736
  int p2,
1693
1737
  int p3);
1694
1738
 
1739
+ // pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
1740
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad_reflect_1d(
1741
+ struct wsp_ggml_context * ctx,
1742
+ struct wsp_ggml_tensor * a,
1743
+ int p0,
1744
+ int p1);
1745
+
1695
1746
  // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
1696
1747
  // timesteps: [N,]
1697
1748
  // return: [N, dim]
@@ -1724,7 +1775,7 @@ extern "C" {
1724
1775
  struct wsp_ggml_tensor * a,
1725
1776
  int k);
1726
1777
 
1727
- #define WSP_GGML_KQ_MASK_PAD 32
1778
+ #define WSP_GGML_KQ_MASK_PAD 64
1728
1779
 
1729
1780
  // q: [n_embd, n_batch, n_head, 1]
1730
1781
  // k: [n_embd, n_kv, n_head_kv, 1]
@@ -1830,6 +1881,15 @@ extern "C" {
1830
1881
  struct wsp_ggml_tensor * td,
1831
1882
  struct wsp_ggml_tensor * state);
1832
1883
 
1884
+ WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_gated_linear_attn(
1885
+ struct wsp_ggml_context * ctx,
1886
+ struct wsp_ggml_tensor * k,
1887
+ struct wsp_ggml_tensor * v,
1888
+ struct wsp_ggml_tensor * q,
1889
+ struct wsp_ggml_tensor * g,
1890
+ struct wsp_ggml_tensor * state,
1891
+ float scale);
1892
+
1833
1893
  // custom operators
1834
1894
 
1835
1895
  typedef void (*wsp_ggml_unary_op_f32_t) (const int, float *, const float *);
@@ -2068,137 +2128,19 @@ extern "C" {
2068
2128
  int64_t n_per_row,
2069
2129
  const float * imatrix);
2070
2130
 
2071
- //
2072
- // gguf
2073
- //
2074
-
2075
- enum wsp_gguf_type {
2076
- WSP_GGUF_TYPE_UINT8 = 0,
2077
- WSP_GGUF_TYPE_INT8 = 1,
2078
- WSP_GGUF_TYPE_UINT16 = 2,
2079
- WSP_GGUF_TYPE_INT16 = 3,
2080
- WSP_GGUF_TYPE_UINT32 = 4,
2081
- WSP_GGUF_TYPE_INT32 = 5,
2082
- WSP_GGUF_TYPE_FLOAT32 = 6,
2083
- WSP_GGUF_TYPE_BOOL = 7,
2084
- WSP_GGUF_TYPE_STRING = 8,
2085
- WSP_GGUF_TYPE_ARRAY = 9,
2086
- WSP_GGUF_TYPE_UINT64 = 10,
2087
- WSP_GGUF_TYPE_INT64 = 11,
2088
- WSP_GGUF_TYPE_FLOAT64 = 12,
2089
- WSP_GGUF_TYPE_COUNT, // marks the end of the enum
2090
- };
2091
-
2092
- struct wsp_gguf_context;
2093
-
2094
- struct wsp_gguf_init_params {
2095
- bool no_alloc;
2096
-
2097
- // if not NULL, create a wsp_ggml_context and allocate the tensor data in it
2098
- struct wsp_ggml_context ** ctx;
2099
- };
2100
-
2101
- WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_empty(void);
2102
- WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file(const char * fname, struct wsp_gguf_init_params params);
2103
- //WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_buffer(..);
2104
-
2105
- WSP_GGML_API void wsp_gguf_free(struct wsp_gguf_context * ctx);
2106
-
2107
- WSP_GGML_API const char * wsp_gguf_type_name(enum wsp_gguf_type type);
2108
-
2109
- WSP_GGML_API int wsp_gguf_get_version (const struct wsp_gguf_context * ctx);
2110
- WSP_GGML_API size_t wsp_gguf_get_alignment (const struct wsp_gguf_context * ctx);
2111
- WSP_GGML_API size_t wsp_gguf_get_data_offset(const struct wsp_gguf_context * ctx);
2112
- WSP_GGML_API void * wsp_gguf_get_data (const struct wsp_gguf_context * ctx);
2113
-
2114
- WSP_GGML_API int wsp_gguf_get_n_kv(const struct wsp_gguf_context * ctx);
2115
- WSP_GGML_API int wsp_gguf_find_key(const struct wsp_gguf_context * ctx, const char * key);
2116
- WSP_GGML_API const char * wsp_gguf_get_key (const struct wsp_gguf_context * ctx, int key_id);
2117
-
2118
- WSP_GGML_API enum wsp_gguf_type wsp_gguf_get_kv_type (const struct wsp_gguf_context * ctx, int key_id);
2119
- WSP_GGML_API enum wsp_gguf_type wsp_gguf_get_arr_type(const struct wsp_gguf_context * ctx, int key_id);
2120
-
2121
- // will abort if the wrong type is used for the key
2122
- WSP_GGML_API uint8_t wsp_gguf_get_val_u8 (const struct wsp_gguf_context * ctx, int key_id);
2123
- WSP_GGML_API int8_t wsp_gguf_get_val_i8 (const struct wsp_gguf_context * ctx, int key_id);
2124
- WSP_GGML_API uint16_t wsp_gguf_get_val_u16 (const struct wsp_gguf_context * ctx, int key_id);
2125
- WSP_GGML_API int16_t wsp_gguf_get_val_i16 (const struct wsp_gguf_context * ctx, int key_id);
2126
- WSP_GGML_API uint32_t wsp_gguf_get_val_u32 (const struct wsp_gguf_context * ctx, int key_id);
2127
- WSP_GGML_API int32_t wsp_gguf_get_val_i32 (const struct wsp_gguf_context * ctx, int key_id);
2128
- WSP_GGML_API float wsp_gguf_get_val_f32 (const struct wsp_gguf_context * ctx, int key_id);
2129
- WSP_GGML_API uint64_t wsp_gguf_get_val_u64 (const struct wsp_gguf_context * ctx, int key_id);
2130
- WSP_GGML_API int64_t wsp_gguf_get_val_i64 (const struct wsp_gguf_context * ctx, int key_id);
2131
- WSP_GGML_API double wsp_gguf_get_val_f64 (const struct wsp_gguf_context * ctx, int key_id);
2132
- WSP_GGML_API bool wsp_gguf_get_val_bool(const struct wsp_gguf_context * ctx, int key_id);
2133
- WSP_GGML_API const char * wsp_gguf_get_val_str (const struct wsp_gguf_context * ctx, int key_id);
2134
- WSP_GGML_API const void * wsp_gguf_get_val_data(const struct wsp_gguf_context * ctx, int key_id);
2135
- WSP_GGML_API int wsp_gguf_get_arr_n (const struct wsp_gguf_context * ctx, int key_id);
2136
- WSP_GGML_API const void * wsp_gguf_get_arr_data(const struct wsp_gguf_context * ctx, int key_id);
2137
- WSP_GGML_API const char * wsp_gguf_get_arr_str (const struct wsp_gguf_context * ctx, int key_id, int i);
2138
-
2139
- WSP_GGML_API int wsp_gguf_get_n_tensors (const struct wsp_gguf_context * ctx);
2140
- WSP_GGML_API int wsp_gguf_find_tensor (const struct wsp_gguf_context * ctx, const char * name);
2141
- WSP_GGML_API size_t wsp_gguf_get_tensor_offset(const struct wsp_gguf_context * ctx, int i);
2142
- WSP_GGML_API char * wsp_gguf_get_tensor_name (const struct wsp_gguf_context * ctx, int i);
2143
- WSP_GGML_API enum wsp_ggml_type wsp_gguf_get_tensor_type (const struct wsp_gguf_context * ctx, int i);
2144
-
2145
- // removes key if it exists
2146
- WSP_GGML_API void wsp_gguf_remove_key(struct wsp_gguf_context * ctx, const char * key);
2147
-
2148
- // overrides existing values or adds a new one
2149
- WSP_GGML_API void wsp_gguf_set_val_u8 (struct wsp_gguf_context * ctx, const char * key, uint8_t val);
2150
- WSP_GGML_API void wsp_gguf_set_val_i8 (struct wsp_gguf_context * ctx, const char * key, int8_t val);
2151
- WSP_GGML_API void wsp_gguf_set_val_u16 (struct wsp_gguf_context * ctx, const char * key, uint16_t val);
2152
- WSP_GGML_API void wsp_gguf_set_val_i16 (struct wsp_gguf_context * ctx, const char * key, int16_t val);
2153
- WSP_GGML_API void wsp_gguf_set_val_u32 (struct wsp_gguf_context * ctx, const char * key, uint32_t val);
2154
- WSP_GGML_API void wsp_gguf_set_val_i32 (struct wsp_gguf_context * ctx, const char * key, int32_t val);
2155
- WSP_GGML_API void wsp_gguf_set_val_f32 (struct wsp_gguf_context * ctx, const char * key, float val);
2156
- WSP_GGML_API void wsp_gguf_set_val_u64 (struct wsp_gguf_context * ctx, const char * key, uint64_t val);
2157
- WSP_GGML_API void wsp_gguf_set_val_i64 (struct wsp_gguf_context * ctx, const char * key, int64_t val);
2158
- WSP_GGML_API void wsp_gguf_set_val_f64 (struct wsp_gguf_context * ctx, const char * key, double val);
2159
- WSP_GGML_API void wsp_gguf_set_val_bool(struct wsp_gguf_context * ctx, const char * key, bool val);
2160
- WSP_GGML_API void wsp_gguf_set_val_str (struct wsp_gguf_context * ctx, const char * key, const char * val);
2161
- WSP_GGML_API void wsp_gguf_set_arr_data(struct wsp_gguf_context * ctx, const char * key, enum wsp_gguf_type type, const void * data, int n);
2162
- WSP_GGML_API void wsp_gguf_set_arr_str (struct wsp_gguf_context * ctx, const char * key, const char ** data, int n);
2163
-
2164
- // set or add KV pairs from another context
2165
- WSP_GGML_API void wsp_gguf_set_kv(struct wsp_gguf_context * ctx, struct wsp_gguf_context * src);
2166
-
2167
- // manage tensor info
2168
- WSP_GGML_API void wsp_gguf_add_tensor(struct wsp_gguf_context * ctx, const struct wsp_ggml_tensor * tensor);
2169
- WSP_GGML_API void wsp_gguf_set_tensor_type(struct wsp_gguf_context * ctx, const char * name, enum wsp_ggml_type type);
2170
- WSP_GGML_API void wsp_gguf_set_tensor_data(struct wsp_gguf_context * ctx, const char * name, const void * data, size_t size);
2171
-
2172
- // writing gguf files can be done in 2 ways:
2173
- //
2174
- // - write the entire wsp_gguf_context to a binary file in a single pass:
2175
- //
2176
- // wsp_gguf_write_to_file(ctx, fname);
2177
- //
2178
- // - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
2179
- //
2180
- // FILE * f = fopen(fname, "wb");
2181
- // fseek(f, wsp_gguf_get_meta_size(ctx), SEEK_SET);
2182
- // fwrite(f, ...);
2183
- // void * data = wsp_gguf_meta_get_meta_data(ctx);
2184
- // fseek(f, 0, SEEK_SET);
2185
- // fwrite(f, data, wsp_gguf_get_meta_size(ctx));
2186
- // free(data);
2187
- // fclose(f);
2188
- //
2189
-
2190
- // write the entire context to a binary file
2191
- WSP_GGML_API void wsp_gguf_write_to_file(const struct wsp_gguf_context * ctx, const char * fname, bool only_meta);
2192
-
2193
- // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
2194
- WSP_GGML_API size_t wsp_gguf_get_meta_size(const struct wsp_gguf_context * ctx);
2195
- WSP_GGML_API void wsp_gguf_get_meta_data(const struct wsp_gguf_context * ctx, void * data);
2196
-
2197
- #ifdef __cplusplus
2198
- // restrict not standard in C++
2199
- #define WSP_GGML_RESTRICT
2131
+ #ifdef __cplusplus
2132
+ // restrict not standard in C++
2133
+ # if defined(__GNUC__)
2134
+ # define WSP_GGML_RESTRICT __restrict__
2135
+ # elif defined(__clang__)
2136
+ # define WSP_GGML_RESTRICT __restrict
2137
+ # elif defined(_MSC_VER)
2138
+ # define WSP_GGML_RESTRICT __restrict
2139
+ # else
2140
+ # define WSP_GGML_RESTRICT
2141
+ # endif
2200
2142
  #else
2201
- #define WSP_GGML_RESTRICT restrict
2143
+ # define WSP_GGML_RESTRICT restrict
2202
2144
  #endif
2203
2145
  typedef void (*wsp_ggml_to_float_t) (const void * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
2204
2146
  typedef void (*wsp_ggml_from_float_t)(const float * WSP_GGML_RESTRICT x, void * WSP_GGML_RESTRICT y, int64_t k);
@@ -2215,6 +2157,37 @@ extern "C" {
2215
2157
 
2216
2158
  WSP_GGML_API const struct wsp_ggml_type_traits * wsp_ggml_get_type_traits(enum wsp_ggml_type type);
2217
2159
 
2160
+ // ggml threadpool
2161
+ // TODO: currently, only a few functions are in the base ggml API, while the rest are in the CPU backend
2162
+ // the goal should be to create an API that other backends can use move everything to the ggml base
2163
+
2164
+ // scheduling priorities
2165
+ enum wsp_ggml_sched_priority {
2166
+ WSP_GGML_SCHED_PRIO_NORMAL,
2167
+ WSP_GGML_SCHED_PRIO_MEDIUM,
2168
+ WSP_GGML_SCHED_PRIO_HIGH,
2169
+ WSP_GGML_SCHED_PRIO_REALTIME
2170
+ };
2171
+
2172
+ // threadpool params
2173
+ // Use wsp_ggml_threadpool_params_default() or wsp_ggml_threadpool_params_init() to populate the defaults
2174
+ struct wsp_ggml_threadpool_params {
2175
+ bool cpumask[WSP_GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
2176
+ int n_threads; // number of threads
2177
+ enum wsp_ggml_sched_priority prio; // thread priority
2178
+ uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
2179
+ bool strict_cpu; // strict cpu placement
2180
+ bool paused; // start in paused state
2181
+ };
2182
+
2183
+ struct wsp_ggml_threadpool; // forward declaration, see ggml.c
2184
+
2185
+ typedef struct wsp_ggml_threadpool * wsp_ggml_threadpool_t;
2186
+
2187
+ WSP_GGML_API struct wsp_ggml_threadpool_params wsp_ggml_threadpool_params_default(int n_threads);
2188
+ WSP_GGML_API void wsp_ggml_threadpool_params_init (struct wsp_ggml_threadpool_params * p, int n_threads);
2189
+ WSP_GGML_API bool wsp_ggml_threadpool_params_match (const struct wsp_ggml_threadpool_params * p0, const struct wsp_ggml_threadpool_params * p1);
2190
+
2218
2191
  #ifdef __cplusplus
2219
2192
  }
2220
2193
  #endif