whisper.rn 0.4.0-rc.10 → 0.4.0-rc.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +9 -3
- package/cpp/amx/amx.cpp +220 -0
- package/cpp/amx/amx.h +8 -0
- package/cpp/amx/common.h +91 -0
- package/cpp/amx/mmq.cpp +2511 -0
- package/cpp/amx/mmq.h +10 -0
- package/cpp/ggml-alloc.c +6 -14
- package/cpp/ggml-backend-impl.h +50 -11
- package/cpp/ggml-backend-reg.cpp +409 -31
- package/cpp/ggml-backend.cpp +9 -3
- package/cpp/ggml-backend.h +18 -0
- package/cpp/ggml-common.h +41 -43
- package/cpp/ggml-cpp.h +1 -0
- package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +941 -254
- package/cpp/ggml-cpu-aarch64.h +2 -24
- package/cpp/ggml-cpu-impl.h +171 -11
- package/cpp/ggml-cpu-quants.c +1812 -389
- package/cpp/ggml-cpu-traits.cpp +36 -0
- package/cpp/ggml-cpu-traits.h +38 -0
- package/cpp/ggml-cpu.c +1432 -610
- package/cpp/ggml-cpu.cpp +131 -141
- package/cpp/ggml-cpu.h +10 -50
- package/cpp/ggml-impl.h +27 -11
- package/cpp/ggml-metal-impl.h +39 -0
- package/cpp/ggml-metal.h +1 -1
- package/cpp/ggml-metal.m +1031 -359
- package/cpp/ggml-opt.cpp +854 -0
- package/cpp/ggml-opt.h +216 -0
- package/cpp/ggml-quants.c +0 -9
- package/cpp/ggml-threading.h +4 -2
- package/cpp/ggml-whisper.metallib +0 -0
- package/cpp/ggml.c +501 -1537
- package/cpp/ggml.h +144 -171
- package/cpp/gguf.cpp +1329 -0
- package/cpp/gguf.h +202 -0
- package/cpp/whisper.cpp +254 -114
- package/cpp/whisper.h +6 -3
- package/lib/commonjs/version.json +1 -1
- package/lib/module/version.json +1 -1
- package/package.json +2 -1
- package/src/version.json +1 -1
- package/whisper-rn.podspec +2 -2
- package/cpp/README.md +0 -4
- package/cpp/ggml-aarch64.c +0 -129
- package/cpp/ggml-aarch64.h +0 -19
- package/cpp/ggml-backend.cpp.rej +0 -12
package/cpp/ggml.h
CHANGED
|
@@ -198,7 +198,7 @@
|
|
|
198
198
|
|
|
199
199
|
#ifndef __GNUC__
|
|
200
200
|
# define WSP_GGML_ATTRIBUTE_FORMAT(...)
|
|
201
|
-
#elif defined(__MINGW32__)
|
|
201
|
+
#elif defined(__MINGW32__) && !defined(__clang__)
|
|
202
202
|
# define WSP_GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
|
203
203
|
#else
|
|
204
204
|
# define WSP_GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
|
@@ -237,13 +237,9 @@
|
|
|
237
237
|
#define WSP_GGML_EXIT_SUCCESS 0
|
|
238
238
|
#define WSP_GGML_EXIT_ABORTED 1
|
|
239
239
|
|
|
240
|
-
#define WSP_GGML_ROPE_TYPE_NEOX
|
|
241
|
-
|
|
242
|
-
#define
|
|
243
|
-
|
|
244
|
-
#define WSP_GGUF_VERSION 3
|
|
245
|
-
|
|
246
|
-
#define WSP_GGUF_DEFAULT_ALIGNMENT 32
|
|
240
|
+
#define WSP_GGML_ROPE_TYPE_NEOX 2
|
|
241
|
+
#define WSP_GGML_ROPE_TYPE_MROPE 8
|
|
242
|
+
#define WSP_GGML_ROPE_TYPE_VISION 24
|
|
247
243
|
|
|
248
244
|
#define WSP_GGML_UNUSED(x) (void)(x)
|
|
249
245
|
|
|
@@ -384,12 +380,15 @@ extern "C" {
|
|
|
384
380
|
WSP_GGML_TYPE_F64 = 28,
|
|
385
381
|
WSP_GGML_TYPE_IQ1_M = 29,
|
|
386
382
|
WSP_GGML_TYPE_BF16 = 30,
|
|
387
|
-
WSP_GGML_TYPE_Q4_0_4_4 = 31,
|
|
388
|
-
WSP_GGML_TYPE_Q4_0_4_8 = 32,
|
|
389
|
-
WSP_GGML_TYPE_Q4_0_8_8 = 33,
|
|
383
|
+
// WSP_GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
|
|
384
|
+
// WSP_GGML_TYPE_Q4_0_4_8 = 32,
|
|
385
|
+
// WSP_GGML_TYPE_Q4_0_8_8 = 33,
|
|
390
386
|
WSP_GGML_TYPE_TQ1_0 = 34,
|
|
391
387
|
WSP_GGML_TYPE_TQ2_0 = 35,
|
|
392
|
-
|
|
388
|
+
// WSP_GGML_TYPE_IQ4_NL_4_4 = 36,
|
|
389
|
+
// WSP_GGML_TYPE_IQ4_NL_4_8 = 37,
|
|
390
|
+
// WSP_GGML_TYPE_IQ4_NL_8_8 = 38,
|
|
391
|
+
WSP_GGML_TYPE_COUNT = 39,
|
|
393
392
|
};
|
|
394
393
|
|
|
395
394
|
// precision
|
|
@@ -398,12 +397,6 @@ extern "C" {
|
|
|
398
397
|
WSP_GGML_PREC_F32,
|
|
399
398
|
};
|
|
400
399
|
|
|
401
|
-
enum wsp_ggml_backend_type {
|
|
402
|
-
WSP_GGML_BACKEND_TYPE_CPU = 0,
|
|
403
|
-
WSP_GGML_BACKEND_TYPE_GPU = 10,
|
|
404
|
-
WSP_GGML_BACKEND_TYPE_GPU_SPLIT = 20,
|
|
405
|
-
};
|
|
406
|
-
|
|
407
400
|
// model file types
|
|
408
401
|
enum wsp_ggml_ftype {
|
|
409
402
|
WSP_GGML_FTYPE_UNKNOWN = -1,
|
|
@@ -430,9 +423,6 @@ extern "C" {
|
|
|
430
423
|
WSP_GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
|
|
431
424
|
WSP_GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
|
|
432
425
|
WSP_GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
|
|
433
|
-
WSP_GGML_FTYPE_MOSTLY_Q4_0_4_4 = 25, // except 1d tensors
|
|
434
|
-
WSP_GGML_FTYPE_MOSTLY_Q4_0_4_8 = 26, // except 1d tensors
|
|
435
|
-
WSP_GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
|
|
436
426
|
};
|
|
437
427
|
|
|
438
428
|
// available tensor operations:
|
|
@@ -496,6 +486,7 @@ extern "C" {
|
|
|
496
486
|
WSP_GGML_OP_POOL_2D_BACK,
|
|
497
487
|
WSP_GGML_OP_UPSCALE, // nearest interpolate
|
|
498
488
|
WSP_GGML_OP_PAD,
|
|
489
|
+
WSP_GGML_OP_PAD_REFLECT_1D,
|
|
499
490
|
WSP_GGML_OP_ARANGE,
|
|
500
491
|
WSP_GGML_OP_TIMESTEP_EMBEDDING,
|
|
501
492
|
WSP_GGML_OP_ARGSORT,
|
|
@@ -510,6 +501,7 @@ extern "C" {
|
|
|
510
501
|
WSP_GGML_OP_GET_REL_POS,
|
|
511
502
|
WSP_GGML_OP_ADD_REL_POS,
|
|
512
503
|
WSP_GGML_OP_RWKV_WKV6,
|
|
504
|
+
WSP_GGML_OP_GATED_LINEAR_ATTN,
|
|
513
505
|
|
|
514
506
|
WSP_GGML_OP_UNARY,
|
|
515
507
|
|
|
@@ -584,8 +576,6 @@ extern "C" {
|
|
|
584
576
|
struct wsp_ggml_tensor {
|
|
585
577
|
enum wsp_ggml_type type;
|
|
586
578
|
|
|
587
|
-
WSP_GGML_DEPRECATED(enum wsp_ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
|
|
588
|
-
|
|
589
579
|
struct wsp_ggml_backend_buffer * buffer;
|
|
590
580
|
|
|
591
581
|
int64_t ne[WSP_GGML_MAX_DIMS]; // number of elements
|
|
@@ -1394,16 +1384,20 @@ extern "C" {
|
|
|
1394
1384
|
float scale,
|
|
1395
1385
|
float max_bias);
|
|
1396
1386
|
|
|
1397
|
-
WSP_GGML_API struct wsp_ggml_tensor *
|
|
1387
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_back(
|
|
1398
1388
|
struct wsp_ggml_context * ctx,
|
|
1399
1389
|
struct wsp_ggml_tensor * a,
|
|
1400
|
-
struct wsp_ggml_tensor * b
|
|
1390
|
+
struct wsp_ggml_tensor * b,
|
|
1391
|
+
float scale,
|
|
1392
|
+
float max_bias);
|
|
1401
1393
|
|
|
1402
1394
|
// in-place, returns view(a)
|
|
1403
|
-
WSP_GGML_API struct wsp_ggml_tensor *
|
|
1395
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_soft_max_ext_back_inplace(
|
|
1404
1396
|
struct wsp_ggml_context * ctx,
|
|
1405
1397
|
struct wsp_ggml_tensor * a,
|
|
1406
|
-
struct wsp_ggml_tensor * b
|
|
1398
|
+
struct wsp_ggml_tensor * b,
|
|
1399
|
+
float scale,
|
|
1400
|
+
float max_bias);
|
|
1407
1401
|
|
|
1408
1402
|
// rotary position embedding
|
|
1409
1403
|
// if (mode & 1) - skip n_past elements (NOT SUPPORTED)
|
|
@@ -1442,6 +1436,22 @@ extern "C" {
|
|
|
1442
1436
|
float beta_fast,
|
|
1443
1437
|
float beta_slow);
|
|
1444
1438
|
|
|
1439
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_multi(
|
|
1440
|
+
struct wsp_ggml_context * ctx,
|
|
1441
|
+
struct wsp_ggml_tensor * a,
|
|
1442
|
+
struct wsp_ggml_tensor * b,
|
|
1443
|
+
struct wsp_ggml_tensor * c,
|
|
1444
|
+
int n_dims,
|
|
1445
|
+
int sections[4],
|
|
1446
|
+
int mode,
|
|
1447
|
+
int n_ctx_orig,
|
|
1448
|
+
float freq_base,
|
|
1449
|
+
float freq_scale,
|
|
1450
|
+
float ext_factor,
|
|
1451
|
+
float attn_factor,
|
|
1452
|
+
float beta_fast,
|
|
1453
|
+
float beta_slow);
|
|
1454
|
+
|
|
1445
1455
|
// in-place, returns view(a)
|
|
1446
1456
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_ext_inplace(
|
|
1447
1457
|
struct wsp_ggml_context * ctx,
|
|
@@ -1494,7 +1504,7 @@ extern "C" {
|
|
|
1494
1504
|
|
|
1495
1505
|
// rotary position embedding backward, i.e compute dx from dy
|
|
1496
1506
|
// a - dy
|
|
1497
|
-
WSP_GGML_API struct wsp_ggml_tensor *
|
|
1507
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_ext_back(
|
|
1498
1508
|
struct wsp_ggml_context * ctx,
|
|
1499
1509
|
struct wsp_ggml_tensor * a, // gradients of wsp_ggml_rope result
|
|
1500
1510
|
struct wsp_ggml_tensor * b, // positions
|
|
@@ -1509,6 +1519,23 @@ extern "C" {
|
|
|
1509
1519
|
float beta_fast,
|
|
1510
1520
|
float beta_slow);
|
|
1511
1521
|
|
|
1522
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_rope_multi_back(
|
|
1523
|
+
struct wsp_ggml_context * ctx,
|
|
1524
|
+
struct wsp_ggml_tensor * a,
|
|
1525
|
+
struct wsp_ggml_tensor * b,
|
|
1526
|
+
struct wsp_ggml_tensor * c,
|
|
1527
|
+
int n_dims,
|
|
1528
|
+
int sections[4],
|
|
1529
|
+
int mode,
|
|
1530
|
+
int n_ctx_orig,
|
|
1531
|
+
float freq_base,
|
|
1532
|
+
float freq_scale,
|
|
1533
|
+
float ext_factor,
|
|
1534
|
+
float attn_factor,
|
|
1535
|
+
float beta_fast,
|
|
1536
|
+
float beta_slow);
|
|
1537
|
+
|
|
1538
|
+
|
|
1512
1539
|
// clamp
|
|
1513
1540
|
// in-place, returns view(a)
|
|
1514
1541
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_clamp(
|
|
@@ -1545,17 +1572,6 @@ extern "C" {
|
|
|
1545
1572
|
int d1, // dilation dimension 1
|
|
1546
1573
|
bool is_2D);
|
|
1547
1574
|
|
|
1548
|
-
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_depthwise_2d(
|
|
1549
|
-
struct wsp_ggml_context * ctx,
|
|
1550
|
-
struct wsp_ggml_tensor * a, // convolution kernel
|
|
1551
|
-
struct wsp_ggml_tensor * b, // data
|
|
1552
|
-
int s0, // stride dimension 0
|
|
1553
|
-
int s1, // stride dimension 1
|
|
1554
|
-
int p0, // padding dimension 0
|
|
1555
|
-
int p1, // padding dimension 1
|
|
1556
|
-
int d0, // dilation dimension 0
|
|
1557
|
-
int d1); // dilation dimension 1
|
|
1558
|
-
|
|
1559
1575
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_1d(
|
|
1560
1576
|
struct wsp_ggml_context * ctx,
|
|
1561
1577
|
struct wsp_ggml_tensor * a, // convolution kernel
|
|
@@ -1573,6 +1589,23 @@ extern "C" {
|
|
|
1573
1589
|
int s, // stride
|
|
1574
1590
|
int d); // dilation
|
|
1575
1591
|
|
|
1592
|
+
// depthwise
|
|
1593
|
+
// TODO: this is very likely wrong for some cases! - needs more testing
|
|
1594
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_1d_dw(
|
|
1595
|
+
struct wsp_ggml_context * ctx,
|
|
1596
|
+
struct wsp_ggml_tensor * a, // convolution kernel
|
|
1597
|
+
struct wsp_ggml_tensor * b, // data
|
|
1598
|
+
int s0, // stride
|
|
1599
|
+
int p0, // padding
|
|
1600
|
+
int d0); // dilation
|
|
1601
|
+
|
|
1602
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_1d_dw_ph(
|
|
1603
|
+
struct wsp_ggml_context * ctx,
|
|
1604
|
+
struct wsp_ggml_tensor * a, // convolution kernel
|
|
1605
|
+
struct wsp_ggml_tensor * b, // data
|
|
1606
|
+
int s0, // stride
|
|
1607
|
+
int d0); // dilation
|
|
1608
|
+
|
|
1576
1609
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_transpose_1d(
|
|
1577
1610
|
struct wsp_ggml_context * ctx,
|
|
1578
1611
|
struct wsp_ggml_tensor * a, // convolution kernel
|
|
@@ -1592,7 +1625,6 @@ extern "C" {
|
|
|
1592
1625
|
int d0, // dilation dimension 0
|
|
1593
1626
|
int d1); // dilation dimension 1
|
|
1594
1627
|
|
|
1595
|
-
|
|
1596
1628
|
// kernel size is a->ne[0] x a->ne[1]
|
|
1597
1629
|
// stride is equal to kernel size
|
|
1598
1630
|
// padding is zero
|
|
@@ -1619,6 +1651,18 @@ extern "C" {
|
|
|
1619
1651
|
struct wsp_ggml_tensor * a,
|
|
1620
1652
|
struct wsp_ggml_tensor * b);
|
|
1621
1653
|
|
|
1654
|
+
// depthwise
|
|
1655
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_2d_dw(
|
|
1656
|
+
struct wsp_ggml_context * ctx,
|
|
1657
|
+
struct wsp_ggml_tensor * a, // convolution kernel
|
|
1658
|
+
struct wsp_ggml_tensor * b, // data
|
|
1659
|
+
int s0, // stride dimension 0
|
|
1660
|
+
int s1, // stride dimension 1
|
|
1661
|
+
int p0, // padding dimension 0
|
|
1662
|
+
int p1, // padding dimension 1
|
|
1663
|
+
int d0, // dilation dimension 0
|
|
1664
|
+
int d1); // dilation dimension 1
|
|
1665
|
+
|
|
1622
1666
|
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_conv_transpose_2d_p0(
|
|
1623
1667
|
struct wsp_ggml_context * ctx,
|
|
1624
1668
|
struct wsp_ggml_tensor * a,
|
|
@@ -1692,6 +1736,13 @@ extern "C" {
|
|
|
1692
1736
|
int p2,
|
|
1693
1737
|
int p3);
|
|
1694
1738
|
|
|
1739
|
+
// pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
|
|
1740
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_pad_reflect_1d(
|
|
1741
|
+
struct wsp_ggml_context * ctx,
|
|
1742
|
+
struct wsp_ggml_tensor * a,
|
|
1743
|
+
int p0,
|
|
1744
|
+
int p1);
|
|
1745
|
+
|
|
1695
1746
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
|
1696
1747
|
// timesteps: [N,]
|
|
1697
1748
|
// return: [N, dim]
|
|
@@ -1724,7 +1775,7 @@ extern "C" {
|
|
|
1724
1775
|
struct wsp_ggml_tensor * a,
|
|
1725
1776
|
int k);
|
|
1726
1777
|
|
|
1727
|
-
#define WSP_GGML_KQ_MASK_PAD
|
|
1778
|
+
#define WSP_GGML_KQ_MASK_PAD 64
|
|
1728
1779
|
|
|
1729
1780
|
// q: [n_embd, n_batch, n_head, 1]
|
|
1730
1781
|
// k: [n_embd, n_kv, n_head_kv, 1]
|
|
@@ -1830,6 +1881,15 @@ extern "C" {
|
|
|
1830
1881
|
struct wsp_ggml_tensor * td,
|
|
1831
1882
|
struct wsp_ggml_tensor * state);
|
|
1832
1883
|
|
|
1884
|
+
WSP_GGML_API struct wsp_ggml_tensor * wsp_ggml_gated_linear_attn(
|
|
1885
|
+
struct wsp_ggml_context * ctx,
|
|
1886
|
+
struct wsp_ggml_tensor * k,
|
|
1887
|
+
struct wsp_ggml_tensor * v,
|
|
1888
|
+
struct wsp_ggml_tensor * q,
|
|
1889
|
+
struct wsp_ggml_tensor * g,
|
|
1890
|
+
struct wsp_ggml_tensor * state,
|
|
1891
|
+
float scale);
|
|
1892
|
+
|
|
1833
1893
|
// custom operators
|
|
1834
1894
|
|
|
1835
1895
|
typedef void (*wsp_ggml_unary_op_f32_t) (const int, float *, const float *);
|
|
@@ -2068,137 +2128,19 @@ extern "C" {
|
|
|
2068
2128
|
int64_t n_per_row,
|
|
2069
2129
|
const float * imatrix);
|
|
2070
2130
|
|
|
2071
|
-
|
|
2072
|
-
//
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
WSP_GGUF_TYPE_FLOAT32 = 6,
|
|
2083
|
-
WSP_GGUF_TYPE_BOOL = 7,
|
|
2084
|
-
WSP_GGUF_TYPE_STRING = 8,
|
|
2085
|
-
WSP_GGUF_TYPE_ARRAY = 9,
|
|
2086
|
-
WSP_GGUF_TYPE_UINT64 = 10,
|
|
2087
|
-
WSP_GGUF_TYPE_INT64 = 11,
|
|
2088
|
-
WSP_GGUF_TYPE_FLOAT64 = 12,
|
|
2089
|
-
WSP_GGUF_TYPE_COUNT, // marks the end of the enum
|
|
2090
|
-
};
|
|
2091
|
-
|
|
2092
|
-
struct wsp_gguf_context;
|
|
2093
|
-
|
|
2094
|
-
struct wsp_gguf_init_params {
|
|
2095
|
-
bool no_alloc;
|
|
2096
|
-
|
|
2097
|
-
// if not NULL, create a wsp_ggml_context and allocate the tensor data in it
|
|
2098
|
-
struct wsp_ggml_context ** ctx;
|
|
2099
|
-
};
|
|
2100
|
-
|
|
2101
|
-
WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_empty(void);
|
|
2102
|
-
WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_file(const char * fname, struct wsp_gguf_init_params params);
|
|
2103
|
-
//WSP_GGML_API struct wsp_gguf_context * wsp_gguf_init_from_buffer(..);
|
|
2104
|
-
|
|
2105
|
-
WSP_GGML_API void wsp_gguf_free(struct wsp_gguf_context * ctx);
|
|
2106
|
-
|
|
2107
|
-
WSP_GGML_API const char * wsp_gguf_type_name(enum wsp_gguf_type type);
|
|
2108
|
-
|
|
2109
|
-
WSP_GGML_API int wsp_gguf_get_version (const struct wsp_gguf_context * ctx);
|
|
2110
|
-
WSP_GGML_API size_t wsp_gguf_get_alignment (const struct wsp_gguf_context * ctx);
|
|
2111
|
-
WSP_GGML_API size_t wsp_gguf_get_data_offset(const struct wsp_gguf_context * ctx);
|
|
2112
|
-
WSP_GGML_API void * wsp_gguf_get_data (const struct wsp_gguf_context * ctx);
|
|
2113
|
-
|
|
2114
|
-
WSP_GGML_API int wsp_gguf_get_n_kv(const struct wsp_gguf_context * ctx);
|
|
2115
|
-
WSP_GGML_API int wsp_gguf_find_key(const struct wsp_gguf_context * ctx, const char * key);
|
|
2116
|
-
WSP_GGML_API const char * wsp_gguf_get_key (const struct wsp_gguf_context * ctx, int key_id);
|
|
2117
|
-
|
|
2118
|
-
WSP_GGML_API enum wsp_gguf_type wsp_gguf_get_kv_type (const struct wsp_gguf_context * ctx, int key_id);
|
|
2119
|
-
WSP_GGML_API enum wsp_gguf_type wsp_gguf_get_arr_type(const struct wsp_gguf_context * ctx, int key_id);
|
|
2120
|
-
|
|
2121
|
-
// will abort if the wrong type is used for the key
|
|
2122
|
-
WSP_GGML_API uint8_t wsp_gguf_get_val_u8 (const struct wsp_gguf_context * ctx, int key_id);
|
|
2123
|
-
WSP_GGML_API int8_t wsp_gguf_get_val_i8 (const struct wsp_gguf_context * ctx, int key_id);
|
|
2124
|
-
WSP_GGML_API uint16_t wsp_gguf_get_val_u16 (const struct wsp_gguf_context * ctx, int key_id);
|
|
2125
|
-
WSP_GGML_API int16_t wsp_gguf_get_val_i16 (const struct wsp_gguf_context * ctx, int key_id);
|
|
2126
|
-
WSP_GGML_API uint32_t wsp_gguf_get_val_u32 (const struct wsp_gguf_context * ctx, int key_id);
|
|
2127
|
-
WSP_GGML_API int32_t wsp_gguf_get_val_i32 (const struct wsp_gguf_context * ctx, int key_id);
|
|
2128
|
-
WSP_GGML_API float wsp_gguf_get_val_f32 (const struct wsp_gguf_context * ctx, int key_id);
|
|
2129
|
-
WSP_GGML_API uint64_t wsp_gguf_get_val_u64 (const struct wsp_gguf_context * ctx, int key_id);
|
|
2130
|
-
WSP_GGML_API int64_t wsp_gguf_get_val_i64 (const struct wsp_gguf_context * ctx, int key_id);
|
|
2131
|
-
WSP_GGML_API double wsp_gguf_get_val_f64 (const struct wsp_gguf_context * ctx, int key_id);
|
|
2132
|
-
WSP_GGML_API bool wsp_gguf_get_val_bool(const struct wsp_gguf_context * ctx, int key_id);
|
|
2133
|
-
WSP_GGML_API const char * wsp_gguf_get_val_str (const struct wsp_gguf_context * ctx, int key_id);
|
|
2134
|
-
WSP_GGML_API const void * wsp_gguf_get_val_data(const struct wsp_gguf_context * ctx, int key_id);
|
|
2135
|
-
WSP_GGML_API int wsp_gguf_get_arr_n (const struct wsp_gguf_context * ctx, int key_id);
|
|
2136
|
-
WSP_GGML_API const void * wsp_gguf_get_arr_data(const struct wsp_gguf_context * ctx, int key_id);
|
|
2137
|
-
WSP_GGML_API const char * wsp_gguf_get_arr_str (const struct wsp_gguf_context * ctx, int key_id, int i);
|
|
2138
|
-
|
|
2139
|
-
WSP_GGML_API int wsp_gguf_get_n_tensors (const struct wsp_gguf_context * ctx);
|
|
2140
|
-
WSP_GGML_API int wsp_gguf_find_tensor (const struct wsp_gguf_context * ctx, const char * name);
|
|
2141
|
-
WSP_GGML_API size_t wsp_gguf_get_tensor_offset(const struct wsp_gguf_context * ctx, int i);
|
|
2142
|
-
WSP_GGML_API char * wsp_gguf_get_tensor_name (const struct wsp_gguf_context * ctx, int i);
|
|
2143
|
-
WSP_GGML_API enum wsp_ggml_type wsp_gguf_get_tensor_type (const struct wsp_gguf_context * ctx, int i);
|
|
2144
|
-
|
|
2145
|
-
// removes key if it exists
|
|
2146
|
-
WSP_GGML_API void wsp_gguf_remove_key(struct wsp_gguf_context * ctx, const char * key);
|
|
2147
|
-
|
|
2148
|
-
// overrides existing values or adds a new one
|
|
2149
|
-
WSP_GGML_API void wsp_gguf_set_val_u8 (struct wsp_gguf_context * ctx, const char * key, uint8_t val);
|
|
2150
|
-
WSP_GGML_API void wsp_gguf_set_val_i8 (struct wsp_gguf_context * ctx, const char * key, int8_t val);
|
|
2151
|
-
WSP_GGML_API void wsp_gguf_set_val_u16 (struct wsp_gguf_context * ctx, const char * key, uint16_t val);
|
|
2152
|
-
WSP_GGML_API void wsp_gguf_set_val_i16 (struct wsp_gguf_context * ctx, const char * key, int16_t val);
|
|
2153
|
-
WSP_GGML_API void wsp_gguf_set_val_u32 (struct wsp_gguf_context * ctx, const char * key, uint32_t val);
|
|
2154
|
-
WSP_GGML_API void wsp_gguf_set_val_i32 (struct wsp_gguf_context * ctx, const char * key, int32_t val);
|
|
2155
|
-
WSP_GGML_API void wsp_gguf_set_val_f32 (struct wsp_gguf_context * ctx, const char * key, float val);
|
|
2156
|
-
WSP_GGML_API void wsp_gguf_set_val_u64 (struct wsp_gguf_context * ctx, const char * key, uint64_t val);
|
|
2157
|
-
WSP_GGML_API void wsp_gguf_set_val_i64 (struct wsp_gguf_context * ctx, const char * key, int64_t val);
|
|
2158
|
-
WSP_GGML_API void wsp_gguf_set_val_f64 (struct wsp_gguf_context * ctx, const char * key, double val);
|
|
2159
|
-
WSP_GGML_API void wsp_gguf_set_val_bool(struct wsp_gguf_context * ctx, const char * key, bool val);
|
|
2160
|
-
WSP_GGML_API void wsp_gguf_set_val_str (struct wsp_gguf_context * ctx, const char * key, const char * val);
|
|
2161
|
-
WSP_GGML_API void wsp_gguf_set_arr_data(struct wsp_gguf_context * ctx, const char * key, enum wsp_gguf_type type, const void * data, int n);
|
|
2162
|
-
WSP_GGML_API void wsp_gguf_set_arr_str (struct wsp_gguf_context * ctx, const char * key, const char ** data, int n);
|
|
2163
|
-
|
|
2164
|
-
// set or add KV pairs from another context
|
|
2165
|
-
WSP_GGML_API void wsp_gguf_set_kv(struct wsp_gguf_context * ctx, struct wsp_gguf_context * src);
|
|
2166
|
-
|
|
2167
|
-
// manage tensor info
|
|
2168
|
-
WSP_GGML_API void wsp_gguf_add_tensor(struct wsp_gguf_context * ctx, const struct wsp_ggml_tensor * tensor);
|
|
2169
|
-
WSP_GGML_API void wsp_gguf_set_tensor_type(struct wsp_gguf_context * ctx, const char * name, enum wsp_ggml_type type);
|
|
2170
|
-
WSP_GGML_API void wsp_gguf_set_tensor_data(struct wsp_gguf_context * ctx, const char * name, const void * data, size_t size);
|
|
2171
|
-
|
|
2172
|
-
// writing gguf files can be done in 2 ways:
|
|
2173
|
-
//
|
|
2174
|
-
// - write the entire wsp_gguf_context to a binary file in a single pass:
|
|
2175
|
-
//
|
|
2176
|
-
// wsp_gguf_write_to_file(ctx, fname);
|
|
2177
|
-
//
|
|
2178
|
-
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
|
|
2179
|
-
//
|
|
2180
|
-
// FILE * f = fopen(fname, "wb");
|
|
2181
|
-
// fseek(f, wsp_gguf_get_meta_size(ctx), SEEK_SET);
|
|
2182
|
-
// fwrite(f, ...);
|
|
2183
|
-
// void * data = wsp_gguf_meta_get_meta_data(ctx);
|
|
2184
|
-
// fseek(f, 0, SEEK_SET);
|
|
2185
|
-
// fwrite(f, data, wsp_gguf_get_meta_size(ctx));
|
|
2186
|
-
// free(data);
|
|
2187
|
-
// fclose(f);
|
|
2188
|
-
//
|
|
2189
|
-
|
|
2190
|
-
// write the entire context to a binary file
|
|
2191
|
-
WSP_GGML_API void wsp_gguf_write_to_file(const struct wsp_gguf_context * ctx, const char * fname, bool only_meta);
|
|
2192
|
-
|
|
2193
|
-
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
|
2194
|
-
WSP_GGML_API size_t wsp_gguf_get_meta_size(const struct wsp_gguf_context * ctx);
|
|
2195
|
-
WSP_GGML_API void wsp_gguf_get_meta_data(const struct wsp_gguf_context * ctx, void * data);
|
|
2196
|
-
|
|
2197
|
-
#ifdef __cplusplus
|
|
2198
|
-
// restrict not standard in C++
|
|
2199
|
-
#define WSP_GGML_RESTRICT
|
|
2131
|
+
#ifdef __cplusplus
|
|
2132
|
+
// restrict not standard in C++
|
|
2133
|
+
# if defined(__GNUC__)
|
|
2134
|
+
# define WSP_GGML_RESTRICT __restrict__
|
|
2135
|
+
# elif defined(__clang__)
|
|
2136
|
+
# define WSP_GGML_RESTRICT __restrict
|
|
2137
|
+
# elif defined(_MSC_VER)
|
|
2138
|
+
# define WSP_GGML_RESTRICT __restrict
|
|
2139
|
+
# else
|
|
2140
|
+
# define WSP_GGML_RESTRICT
|
|
2141
|
+
# endif
|
|
2200
2142
|
#else
|
|
2201
|
-
#define WSP_GGML_RESTRICT restrict
|
|
2143
|
+
# define WSP_GGML_RESTRICT restrict
|
|
2202
2144
|
#endif
|
|
2203
2145
|
typedef void (*wsp_ggml_to_float_t) (const void * WSP_GGML_RESTRICT x, float * WSP_GGML_RESTRICT y, int64_t k);
|
|
2204
2146
|
typedef void (*wsp_ggml_from_float_t)(const float * WSP_GGML_RESTRICT x, void * WSP_GGML_RESTRICT y, int64_t k);
|
|
@@ -2215,6 +2157,37 @@ extern "C" {
|
|
|
2215
2157
|
|
|
2216
2158
|
WSP_GGML_API const struct wsp_ggml_type_traits * wsp_ggml_get_type_traits(enum wsp_ggml_type type);
|
|
2217
2159
|
|
|
2160
|
+
// ggml threadpool
|
|
2161
|
+
// TODO: currently, only a few functions are in the base ggml API, while the rest are in the CPU backend
|
|
2162
|
+
// the goal should be to create an API that other backends can use move everything to the ggml base
|
|
2163
|
+
|
|
2164
|
+
// scheduling priorities
|
|
2165
|
+
enum wsp_ggml_sched_priority {
|
|
2166
|
+
WSP_GGML_SCHED_PRIO_NORMAL,
|
|
2167
|
+
WSP_GGML_SCHED_PRIO_MEDIUM,
|
|
2168
|
+
WSP_GGML_SCHED_PRIO_HIGH,
|
|
2169
|
+
WSP_GGML_SCHED_PRIO_REALTIME
|
|
2170
|
+
};
|
|
2171
|
+
|
|
2172
|
+
// threadpool params
|
|
2173
|
+
// Use wsp_ggml_threadpool_params_default() or wsp_ggml_threadpool_params_init() to populate the defaults
|
|
2174
|
+
struct wsp_ggml_threadpool_params {
|
|
2175
|
+
bool cpumask[WSP_GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
|
|
2176
|
+
int n_threads; // number of threads
|
|
2177
|
+
enum wsp_ggml_sched_priority prio; // thread priority
|
|
2178
|
+
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
|
|
2179
|
+
bool strict_cpu; // strict cpu placement
|
|
2180
|
+
bool paused; // start in paused state
|
|
2181
|
+
};
|
|
2182
|
+
|
|
2183
|
+
struct wsp_ggml_threadpool; // forward declaration, see ggml.c
|
|
2184
|
+
|
|
2185
|
+
typedef struct wsp_ggml_threadpool * wsp_ggml_threadpool_t;
|
|
2186
|
+
|
|
2187
|
+
WSP_GGML_API struct wsp_ggml_threadpool_params wsp_ggml_threadpool_params_default(int n_threads);
|
|
2188
|
+
WSP_GGML_API void wsp_ggml_threadpool_params_init (struct wsp_ggml_threadpool_params * p, int n_threads);
|
|
2189
|
+
WSP_GGML_API bool wsp_ggml_threadpool_params_match (const struct wsp_ggml_threadpool_params * p0, const struct wsp_ggml_threadpool_params * p1);
|
|
2190
|
+
|
|
2218
2191
|
#ifdef __cplusplus
|
|
2219
2192
|
}
|
|
2220
2193
|
#endif
|