@fugood/llama.node 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +12 -12
- package/src/llama.cpp/CMakeLists.txt +0 -1
- package/src/llama.cpp/common/arg.cpp +17 -0
- package/src/llama.cpp/common/chat.cpp +37 -20
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.h +4 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +7 -2
- package/src/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -0
- package/src/llama.cpp/ggml/include/ggml.h +181 -10
- package/src/llama.cpp/ggml/src/CMakeLists.txt +0 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +6 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +38 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1297 -211
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +33 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +103 -9
- package/src/llama.cpp/include/llama.h +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +108 -2
- package/src/llama.cpp/src/llama-arch.h +7 -0
- package/src/llama.cpp/src/llama-batch.cpp +27 -1
- package/src/llama.cpp/src/llama-batch.h +8 -1
- package/src/llama.cpp/src/llama-chat.cpp +15 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +95 -81
- package/src/llama.cpp/src/llama-graph.h +43 -16
- package/src/llama.cpp/src/llama-hparams.cpp +2 -1
- package/src/llama.cpp/src/llama-hparams.h +1 -0
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +28 -18
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +4 -2
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +214 -65
- package/src/llama.cpp/src/llama-kv-cache-unified.h +62 -24
- package/src/llama.cpp/src/llama-kv-cells.h +62 -10
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +9 -4
- package/src/llama.cpp/src/llama-memory-hybrid.h +3 -1
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +34 -16
- package/src/llama.cpp/src/llama-memory.cpp +17 -0
- package/src/llama.cpp/src/llama-memory.h +3 -0
- package/src/llama.cpp/src/llama-model.cpp +1374 -210
- package/src/llama.cpp/src/llama-model.h +3 -0
- package/src/llama.cpp/src/llama-vocab.cpp +8 -1
- package/src/llama.cpp/ggml/include/ggml-kompute.h +0 -50
|
@@ -314,6 +314,13 @@
|
|
|
314
314
|
extern "C" {
|
|
315
315
|
#endif
|
|
316
316
|
|
|
317
|
+
// Function type used in fatal error callbacks
|
|
318
|
+
typedef void (*ggml_abort_callback_t)(const char * error_message);
|
|
319
|
+
|
|
320
|
+
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
|
|
321
|
+
// Returns the old callback for chaining
|
|
322
|
+
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
|
|
323
|
+
|
|
317
324
|
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
|
|
318
325
|
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
|
|
319
326
|
|
|
@@ -470,6 +477,7 @@ extern "C" {
|
|
|
470
477
|
GGML_OP_TRANSPOSE,
|
|
471
478
|
GGML_OP_GET_ROWS,
|
|
472
479
|
GGML_OP_GET_ROWS_BACK,
|
|
480
|
+
GGML_OP_SET_ROWS,
|
|
473
481
|
GGML_OP_DIAG,
|
|
474
482
|
GGML_OP_DIAG_MASK_INF,
|
|
475
483
|
GGML_OP_DIAG_MASK_ZERO,
|
|
@@ -481,12 +489,13 @@ extern "C" {
|
|
|
481
489
|
GGML_OP_CONV_TRANSPOSE_1D,
|
|
482
490
|
GGML_OP_IM2COL,
|
|
483
491
|
GGML_OP_IM2COL_BACK,
|
|
492
|
+
GGML_OP_CONV_2D,
|
|
484
493
|
GGML_OP_CONV_2D_DW,
|
|
485
494
|
GGML_OP_CONV_TRANSPOSE_2D,
|
|
486
495
|
GGML_OP_POOL_1D,
|
|
487
496
|
GGML_OP_POOL_2D,
|
|
488
497
|
GGML_OP_POOL_2D_BACK,
|
|
489
|
-
GGML_OP_UPSCALE,
|
|
498
|
+
GGML_OP_UPSCALE,
|
|
490
499
|
GGML_OP_PAD,
|
|
491
500
|
GGML_OP_PAD_REFLECT_1D,
|
|
492
501
|
GGML_OP_ROLL,
|
|
@@ -519,6 +528,8 @@ extern "C" {
|
|
|
519
528
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
|
520
529
|
GGML_OP_OPT_STEP_ADAMW,
|
|
521
530
|
|
|
531
|
+
GGML_OP_GLU,
|
|
532
|
+
|
|
522
533
|
GGML_OP_COUNT,
|
|
523
534
|
};
|
|
524
535
|
|
|
@@ -542,6 +553,16 @@ extern "C" {
|
|
|
542
553
|
GGML_UNARY_OP_COUNT,
|
|
543
554
|
};
|
|
544
555
|
|
|
556
|
+
enum ggml_glu_op {
|
|
557
|
+
GGML_GLU_OP_REGLU,
|
|
558
|
+
GGML_GLU_OP_GEGLU,
|
|
559
|
+
GGML_GLU_OP_SWIGLU,
|
|
560
|
+
GGML_GLU_OP_GEGLU_ERF,
|
|
561
|
+
GGML_GLU_OP_GEGLU_QUICK,
|
|
562
|
+
|
|
563
|
+
GGML_GLU_OP_COUNT,
|
|
564
|
+
};
|
|
565
|
+
|
|
545
566
|
enum ggml_object_type {
|
|
546
567
|
GGML_OBJECT_TYPE_TENSOR,
|
|
547
568
|
GGML_OBJECT_TYPE_GRAPH,
|
|
@@ -627,6 +648,9 @@ extern "C" {
|
|
|
627
648
|
|
|
628
649
|
// misc
|
|
629
650
|
|
|
651
|
+
GGML_API const char * ggml_version(void);
|
|
652
|
+
GGML_API const char * ggml_commit(void);
|
|
653
|
+
|
|
630
654
|
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
|
|
631
655
|
GGML_API int64_t ggml_time_ms(void);
|
|
632
656
|
GGML_API int64_t ggml_time_us(void);
|
|
@@ -657,6 +681,7 @@ extern "C" {
|
|
|
657
681
|
GGML_API const char * ggml_op_symbol(enum ggml_op op);
|
|
658
682
|
|
|
659
683
|
GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
|
|
684
|
+
GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
|
|
660
685
|
GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
|
|
661
686
|
|
|
662
687
|
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
|
@@ -687,6 +712,9 @@ extern "C" {
|
|
|
687
712
|
// true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
|
|
688
713
|
GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
|
|
689
714
|
|
|
715
|
+
// true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
|
|
716
|
+
GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor);
|
|
717
|
+
|
|
690
718
|
GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
691
719
|
GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
|
692
720
|
|
|
@@ -758,6 +786,7 @@ extern "C" {
|
|
|
758
786
|
GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
|
|
759
787
|
|
|
760
788
|
GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
|
789
|
+
GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
|
|
761
790
|
|
|
762
791
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
|
763
792
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
|
@@ -1086,6 +1115,89 @@ extern "C" {
|
|
|
1086
1115
|
struct ggml_context * ctx,
|
|
1087
1116
|
struct ggml_tensor * a);
|
|
1088
1117
|
|
|
1118
|
+
// gated linear unit ops
|
|
1119
|
+
// A: n columns, r rows,
|
|
1120
|
+
// result is n / 2 columns, r rows,
|
|
1121
|
+
// expects gate in second half of row, unless swapped is true
|
|
1122
|
+
GGML_API struct ggml_tensor * ggml_glu(
|
|
1123
|
+
struct ggml_context * ctx,
|
|
1124
|
+
struct ggml_tensor * a,
|
|
1125
|
+
enum ggml_glu_op op,
|
|
1126
|
+
bool swapped);
|
|
1127
|
+
|
|
1128
|
+
GGML_API struct ggml_tensor * ggml_reglu(
|
|
1129
|
+
struct ggml_context * ctx,
|
|
1130
|
+
struct ggml_tensor * a);
|
|
1131
|
+
|
|
1132
|
+
GGML_API struct ggml_tensor * ggml_reglu_swapped(
|
|
1133
|
+
struct ggml_context * ctx,
|
|
1134
|
+
struct ggml_tensor * a);
|
|
1135
|
+
|
|
1136
|
+
GGML_API struct ggml_tensor * ggml_geglu(
|
|
1137
|
+
struct ggml_context * ctx,
|
|
1138
|
+
struct ggml_tensor * a);
|
|
1139
|
+
|
|
1140
|
+
GGML_API struct ggml_tensor * ggml_geglu_swapped(
|
|
1141
|
+
struct ggml_context * ctx,
|
|
1142
|
+
struct ggml_tensor * a);
|
|
1143
|
+
|
|
1144
|
+
GGML_API struct ggml_tensor * ggml_swiglu(
|
|
1145
|
+
struct ggml_context * ctx,
|
|
1146
|
+
struct ggml_tensor * a);
|
|
1147
|
+
|
|
1148
|
+
GGML_API struct ggml_tensor * ggml_swiglu_swapped(
|
|
1149
|
+
struct ggml_context * ctx,
|
|
1150
|
+
struct ggml_tensor * a);
|
|
1151
|
+
|
|
1152
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf(
|
|
1153
|
+
struct ggml_context * ctx,
|
|
1154
|
+
struct ggml_tensor * a);
|
|
1155
|
+
|
|
1156
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
|
|
1157
|
+
struct ggml_context * ctx,
|
|
1158
|
+
struct ggml_tensor * a);
|
|
1159
|
+
|
|
1160
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick(
|
|
1161
|
+
struct ggml_context * ctx,
|
|
1162
|
+
struct ggml_tensor * a);
|
|
1163
|
+
|
|
1164
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
|
|
1165
|
+
struct ggml_context * ctx,
|
|
1166
|
+
struct ggml_tensor * a);
|
|
1167
|
+
|
|
1168
|
+
// A: n columns, r rows,
|
|
1169
|
+
// B: n columns, r rows,
|
|
1170
|
+
GGML_API struct ggml_tensor * ggml_glu_split(
|
|
1171
|
+
struct ggml_context * ctx,
|
|
1172
|
+
struct ggml_tensor * a,
|
|
1173
|
+
struct ggml_tensor * b,
|
|
1174
|
+
enum ggml_glu_op op);
|
|
1175
|
+
|
|
1176
|
+
GGML_API struct ggml_tensor * ggml_reglu_split(
|
|
1177
|
+
struct ggml_context * ctx,
|
|
1178
|
+
struct ggml_tensor * a,
|
|
1179
|
+
struct ggml_tensor * b);
|
|
1180
|
+
|
|
1181
|
+
GGML_API struct ggml_tensor * ggml_geglu_split(
|
|
1182
|
+
struct ggml_context * ctx,
|
|
1183
|
+
struct ggml_tensor * a,
|
|
1184
|
+
struct ggml_tensor * b);
|
|
1185
|
+
|
|
1186
|
+
GGML_API struct ggml_tensor * ggml_swiglu_split(
|
|
1187
|
+
struct ggml_context * ctx,
|
|
1188
|
+
struct ggml_tensor * a,
|
|
1189
|
+
struct ggml_tensor * b);
|
|
1190
|
+
|
|
1191
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_split(
|
|
1192
|
+
struct ggml_context * ctx,
|
|
1193
|
+
struct ggml_tensor * a,
|
|
1194
|
+
struct ggml_tensor * b);
|
|
1195
|
+
|
|
1196
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_split(
|
|
1197
|
+
struct ggml_context * ctx,
|
|
1198
|
+
struct ggml_tensor * a,
|
|
1199
|
+
struct ggml_tensor * b);
|
|
1200
|
+
|
|
1089
1201
|
// normalize along rows
|
|
1090
1202
|
GGML_API struct ggml_tensor * ggml_norm(
|
|
1091
1203
|
struct ggml_context * ctx,
|
|
@@ -1375,6 +1487,23 @@ extern "C" {
|
|
|
1375
1487
|
struct ggml_tensor * b, // row indices
|
|
1376
1488
|
struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
|
|
1377
1489
|
|
|
1490
|
+
// a TD [n_embd, ne1, ne2, ne3]
|
|
1491
|
+
// b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3
|
|
1492
|
+
// c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1)
|
|
1493
|
+
//
|
|
1494
|
+
// undefined behavior if destination rows overlap
|
|
1495
|
+
//
|
|
1496
|
+
// broadcast:
|
|
1497
|
+
// ne2 % ne11 == 0
|
|
1498
|
+
// ne3 % ne12 == 0
|
|
1499
|
+
//
|
|
1500
|
+
// return view(a)
|
|
1501
|
+
GGML_API struct ggml_tensor * ggml_set_rows(
|
|
1502
|
+
struct ggml_context * ctx,
|
|
1503
|
+
struct ggml_tensor * a, // destination
|
|
1504
|
+
struct ggml_tensor * b, // source
|
|
1505
|
+
struct ggml_tensor * c); // row indices
|
|
1506
|
+
|
|
1378
1507
|
GGML_API struct ggml_tensor * ggml_diag(
|
|
1379
1508
|
struct ggml_context * ctx,
|
|
1380
1509
|
struct ggml_tensor * a);
|
|
@@ -1412,8 +1541,14 @@ extern "C" {
|
|
|
1412
1541
|
struct ggml_context * ctx,
|
|
1413
1542
|
struct ggml_tensor * a);
|
|
1414
1543
|
|
|
1544
|
+
// a [ne0, ne01, ne02, ne03]
|
|
1545
|
+
// mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
|
|
1546
|
+
//
|
|
1547
|
+
// broadcast:
|
|
1548
|
+
// ne02 % ne12 == 0
|
|
1549
|
+
// ne03 % ne13 == 0
|
|
1550
|
+
//
|
|
1415
1551
|
// fused soft_max(a*scale + mask*(ALiBi slope))
|
|
1416
|
-
// mask is optional
|
|
1417
1552
|
// max_bias = 0.0f for no ALiBi
|
|
1418
1553
|
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
|
1419
1554
|
struct ggml_context * ctx,
|
|
@@ -1723,6 +1858,17 @@ extern "C" {
|
|
|
1723
1858
|
struct ggml_tensor * b,
|
|
1724
1859
|
int stride);
|
|
1725
1860
|
|
|
1861
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_direct(
|
|
1862
|
+
struct ggml_context * ctx,
|
|
1863
|
+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
|
|
1864
|
+
struct ggml_tensor * b, // input data [W, H, C, N]
|
|
1865
|
+
int s0, // stride dimension 0
|
|
1866
|
+
int s1, // stride dimension 1
|
|
1867
|
+
int p0, // padding dimension 0
|
|
1868
|
+
int p1, // padding dimension 1
|
|
1869
|
+
int d0, // dilation dimension 0
|
|
1870
|
+
int d1); // dilation dimension 1
|
|
1871
|
+
|
|
1726
1872
|
enum ggml_op_pool {
|
|
1727
1873
|
GGML_OP_POOL_MAX,
|
|
1728
1874
|
GGML_OP_POOL_AVG,
|
|
@@ -1765,6 +1911,12 @@ extern "C" {
|
|
|
1765
1911
|
enum ggml_scale_mode {
|
|
1766
1912
|
GGML_SCALE_MODE_NEAREST = 0,
|
|
1767
1913
|
GGML_SCALE_MODE_BILINEAR = 1,
|
|
1914
|
+
|
|
1915
|
+
GGML_SCALE_MODE_COUNT
|
|
1916
|
+
};
|
|
1917
|
+
|
|
1918
|
+
enum ggml_scale_flag {
|
|
1919
|
+
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
|
|
1768
1920
|
};
|
|
1769
1921
|
|
|
1770
1922
|
// interpolate
|
|
@@ -1777,14 +1929,26 @@ extern "C" {
|
|
|
1777
1929
|
|
|
1778
1930
|
// interpolate
|
|
1779
1931
|
// interpolate scale to specified dimensions
|
|
1780
|
-
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
|
1932
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
|
|
1781
1933
|
struct ggml_context * ctx,
|
|
1782
1934
|
struct ggml_tensor * a,
|
|
1783
1935
|
int ne0,
|
|
1784
1936
|
int ne1,
|
|
1785
1937
|
int ne2,
|
|
1786
1938
|
int ne3,
|
|
1787
|
-
enum ggml_scale_mode mode)
|
|
1939
|
+
enum ggml_scale_mode mode),
|
|
1940
|
+
"use ggml_interpolate instead");
|
|
1941
|
+
|
|
1942
|
+
// Up- or downsamples the input to the specified size.
|
|
1943
|
+
// 2D scale modes (eg. bilinear) are applied to the first two dimensions.
|
|
1944
|
+
GGML_API struct ggml_tensor * ggml_interpolate(
|
|
1945
|
+
struct ggml_context * ctx,
|
|
1946
|
+
struct ggml_tensor * a,
|
|
1947
|
+
int64_t ne0,
|
|
1948
|
+
int64_t ne1,
|
|
1949
|
+
int64_t ne2,
|
|
1950
|
+
int64_t ne3,
|
|
1951
|
+
uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
|
|
1788
1952
|
|
|
1789
1953
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
|
1790
1954
|
GGML_API struct ggml_tensor * ggml_pad(
|
|
@@ -1847,11 +2011,17 @@ extern "C" {
|
|
|
1847
2011
|
|
|
1848
2012
|
#define GGML_KQ_MASK_PAD 64
|
|
1849
2013
|
|
|
1850
|
-
// q: [n_embd_k, n_batch, n_head,
|
|
1851
|
-
// k: [n_embd_k, n_kv, n_head_kv,
|
|
1852
|
-
// v: [n_embd_v, n_kv, n_head_kv,
|
|
1853
|
-
// mask: [n_kv, n_batch_pad,
|
|
1854
|
-
// res: [n_embd_v, n_head, n_batch,
|
|
2014
|
+
// q: [n_embd_k, n_batch, n_head, ne3 ]
|
|
2015
|
+
// k: [n_embd_k, n_kv, n_head_kv, ne3 ]
|
|
2016
|
+
// v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
|
|
2017
|
+
// mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
|
|
2018
|
+
// res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
|
|
2019
|
+
//
|
|
2020
|
+
// broadcast:
|
|
2021
|
+
// n_head % n_head_kv == 0
|
|
2022
|
+
// n_head % ne32 == 0
|
|
2023
|
+
// ne3 % ne33 == 0
|
|
2024
|
+
//
|
|
1855
2025
|
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
|
|
1856
2026
|
struct ggml_context * ctx,
|
|
1857
2027
|
struct ggml_tensor * q,
|
|
@@ -1890,7 +2060,8 @@ extern "C" {
|
|
|
1890
2060
|
struct ggml_tensor * dt,
|
|
1891
2061
|
struct ggml_tensor * A,
|
|
1892
2062
|
struct ggml_tensor * B,
|
|
1893
|
-
struct ggml_tensor * C
|
|
2063
|
+
struct ggml_tensor * C,
|
|
2064
|
+
struct ggml_tensor * ids);
|
|
1894
2065
|
|
|
1895
2066
|
// partition into non-overlapping windows with padding if needed
|
|
1896
2067
|
// example:
|
|
@@ -5,7 +5,7 @@ function(ggml_add_cpu_backend_features cpu_name arch)
|
|
|
5
5
|
# build, using set_source_files_properties() to set the arch flags is not possible
|
|
6
6
|
set(GGML_CPU_FEATS_NAME ${cpu_name}-feats)
|
|
7
7
|
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp)
|
|
8
|
-
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE .
|
|
8
|
+
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . ../include)
|
|
9
9
|
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN})
|
|
10
10
|
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
|
|
11
11
|
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
@@ -589,4 +589,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
589
589
|
if (EMSCRIPTEN)
|
|
590
590
|
set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
|
|
591
591
|
endif()
|
|
592
|
+
|
|
593
|
+
if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
|
|
594
|
+
# The compiler automatically enables "-ffast-math" which can cause NaNs in tests due to "-fassociative-math"
|
|
595
|
+
target_compile_options(${GGML_CPU_NAME} PRIVATE "-fno-associative-math")
|
|
596
|
+
endif()
|
|
592
597
|
endfunction()
|
|
@@ -195,6 +195,7 @@ typedef pthread_t ggml_thread_t;
|
|
|
195
195
|
|
|
196
196
|
static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
|
|
197
197
|
[GGML_TYPE_F32] = {
|
|
198
|
+
.from_float = (ggml_from_float_t) ggml_cpu_fp32_to_fp32,
|
|
198
199
|
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32,
|
|
199
200
|
.vec_dot_type = GGML_TYPE_F32,
|
|
200
201
|
.nrows = 1,
|
|
@@ -1192,7 +1193,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
|
|
|
1192
1193
|
}
|
|
1193
1194
|
}
|
|
1194
1195
|
|
|
1195
|
-
|
|
1196
|
+
void ggml_compute_forward_mul_mat(
|
|
1196
1197
|
const struct ggml_compute_params * params,
|
|
1197
1198
|
struct ggml_tensor * dst) {
|
|
1198
1199
|
|
|
@@ -1817,6 +1818,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
1817
1818
|
{
|
|
1818
1819
|
ggml_compute_forward_get_rows_back(params, tensor);
|
|
1819
1820
|
} break;
|
|
1821
|
+
case GGML_OP_SET_ROWS:
|
|
1822
|
+
{
|
|
1823
|
+
ggml_compute_forward_set_rows(params, tensor);
|
|
1824
|
+
} break;
|
|
1820
1825
|
case GGML_OP_DIAG:
|
|
1821
1826
|
{
|
|
1822
1827
|
ggml_compute_forward_diag(params, tensor);
|
|
@@ -1861,6 +1866,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
1861
1866
|
{
|
|
1862
1867
|
ggml_compute_forward_im2col_back_f32(params, tensor);
|
|
1863
1868
|
} break;
|
|
1869
|
+
case GGML_OP_CONV_2D:
|
|
1870
|
+
{
|
|
1871
|
+
ggml_compute_forward_conv_2d(params, tensor);
|
|
1872
|
+
} break;
|
|
1864
1873
|
case GGML_OP_CONV_2D_DW:
|
|
1865
1874
|
{
|
|
1866
1875
|
ggml_compute_forward_conv_2d_dw(params, tensor);
|
|
@@ -1944,6 +1953,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
1944
1953
|
{
|
|
1945
1954
|
ggml_compute_forward_unary(params, tensor);
|
|
1946
1955
|
} break;
|
|
1956
|
+
case GGML_OP_GLU:
|
|
1957
|
+
{
|
|
1958
|
+
ggml_compute_forward_glu(params, tensor);
|
|
1959
|
+
} break;
|
|
1947
1960
|
case GGML_OP_GET_REL_POS:
|
|
1948
1961
|
{
|
|
1949
1962
|
ggml_compute_forward_get_rel_pos(params, tensor);
|
|
@@ -2154,6 +2167,20 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2154
2167
|
GGML_ABORT("fatal error");
|
|
2155
2168
|
}
|
|
2156
2169
|
break;
|
|
2170
|
+
case GGML_OP_GLU:
|
|
2171
|
+
switch (ggml_get_glu_op(node)) {
|
|
2172
|
+
case GGML_GLU_OP_REGLU:
|
|
2173
|
+
case GGML_GLU_OP_GEGLU:
|
|
2174
|
+
case GGML_GLU_OP_SWIGLU:
|
|
2175
|
+
case GGML_GLU_OP_GEGLU_ERF:
|
|
2176
|
+
case GGML_GLU_OP_GEGLU_QUICK:
|
|
2177
|
+
{
|
|
2178
|
+
n_tasks = n_threads;
|
|
2179
|
+
} break;
|
|
2180
|
+
default:
|
|
2181
|
+
GGML_ABORT("fatal error");
|
|
2182
|
+
}
|
|
2183
|
+
break;
|
|
2157
2184
|
case GGML_OP_SILU_BACK:
|
|
2158
2185
|
case GGML_OP_MUL:
|
|
2159
2186
|
case GGML_OP_DIV:
|
|
@@ -2170,6 +2197,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2170
2197
|
n_tasks = n_threads;
|
|
2171
2198
|
} break;
|
|
2172
2199
|
case GGML_OP_GET_ROWS:
|
|
2200
|
+
case GGML_OP_SET_ROWS:
|
|
2173
2201
|
{
|
|
2174
2202
|
// FIXME: get_rows can use additional threads, but the cost of launching additional threads
|
|
2175
2203
|
// decreases performance with GPU offloading
|
|
@@ -2206,6 +2234,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2206
2234
|
} break;
|
|
2207
2235
|
case GGML_OP_IM2COL:
|
|
2208
2236
|
case GGML_OP_IM2COL_BACK:
|
|
2237
|
+
case GGML_OP_CONV_2D:
|
|
2209
2238
|
case GGML_OP_CONV_2D_DW:
|
|
2210
2239
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
|
2211
2240
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
|
@@ -2724,6 +2753,10 @@ struct ggml_cplan ggml_graph_plan(
|
|
|
2724
2753
|
GGML_ABORT("fatal error");
|
|
2725
2754
|
}
|
|
2726
2755
|
} break;
|
|
2756
|
+
case GGML_OP_CONV_2D:
|
|
2757
|
+
{
|
|
2758
|
+
cur = GGML_IM2COL_WORK_SIZE;
|
|
2759
|
+
} break;
|
|
2727
2760
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
|
2728
2761
|
{
|
|
2729
2762
|
const int64_t ne00 = node->src[0]->ne[0]; // W
|
|
@@ -3124,6 +3157,10 @@ enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct g
|
|
|
3124
3157
|
return ggml_graph_compute(cgraph, &cplan);
|
|
3125
3158
|
}
|
|
3126
3159
|
|
|
3160
|
+
void ggml_cpu_fp32_to_fp32(const float * x, float * y, int64_t n) {
|
|
3161
|
+
memcpy(y, x, n * sizeof(float));
|
|
3162
|
+
}
|
|
3163
|
+
|
|
3127
3164
|
void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
|
|
3128
3165
|
int64_t i = 0;
|
|
3129
3166
|
#if defined(__F16C__)
|