@fugood/llama.node 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/package.json +14 -14
  2. package/scripts/llama.cpp.patch +12 -12
  3. package/src/llama.cpp/CMakeLists.txt +0 -1
  4. package/src/llama.cpp/common/arg.cpp +17 -0
  5. package/src/llama.cpp/common/chat.cpp +37 -20
  6. package/src/llama.cpp/common/chat.h +2 -0
  7. package/src/llama.cpp/common/common.h +4 -0
  8. package/src/llama.cpp/ggml/CMakeLists.txt +7 -2
  9. package/src/llama.cpp/ggml/include/ggml-backend.h +1 -1
  10. package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -0
  11. package/src/llama.cpp/ggml/include/ggml.h +181 -10
  12. package/src/llama.cpp/ggml/src/CMakeLists.txt +0 -1
  13. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +6 -1
  14. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +38 -1
  15. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +1 -0
  16. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1297 -211
  17. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +7 -0
  18. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
  19. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +33 -9
  20. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +103 -9
  21. package/src/llama.cpp/include/llama.h +1 -0
  22. package/src/llama.cpp/src/llama-arch.cpp +108 -2
  23. package/src/llama.cpp/src/llama-arch.h +7 -0
  24. package/src/llama.cpp/src/llama-batch.cpp +27 -1
  25. package/src/llama.cpp/src/llama-batch.h +8 -1
  26. package/src/llama.cpp/src/llama-chat.cpp +15 -0
  27. package/src/llama.cpp/src/llama-chat.h +1 -0
  28. package/src/llama.cpp/src/llama-graph.cpp +95 -81
  29. package/src/llama.cpp/src/llama-graph.h +43 -16
  30. package/src/llama.cpp/src/llama-hparams.cpp +2 -1
  31. package/src/llama.cpp/src/llama-hparams.h +1 -0
  32. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +28 -18
  33. package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +4 -2
  34. package/src/llama.cpp/src/llama-kv-cache-unified.cpp +214 -65
  35. package/src/llama.cpp/src/llama-kv-cache-unified.h +62 -24
  36. package/src/llama.cpp/src/llama-kv-cells.h +62 -10
  37. package/src/llama.cpp/src/llama-memory-hybrid.cpp +9 -4
  38. package/src/llama.cpp/src/llama-memory-hybrid.h +3 -1
  39. package/src/llama.cpp/src/llama-memory-recurrent.cpp +34 -16
  40. package/src/llama.cpp/src/llama-memory.cpp +17 -0
  41. package/src/llama.cpp/src/llama-memory.h +3 -0
  42. package/src/llama.cpp/src/llama-model.cpp +1374 -210
  43. package/src/llama.cpp/src/llama-model.h +3 -0
  44. package/src/llama.cpp/src/llama-vocab.cpp +8 -1
  45. package/src/llama.cpp/ggml/include/ggml-kompute.h +0 -50
@@ -314,6 +314,13 @@
314
314
  extern "C" {
315
315
  #endif
316
316
 
317
+ // Function type used in fatal error callbacks
318
+ typedef void (*ggml_abort_callback_t)(const char * error_message);
319
+
320
+ // Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
321
+ // Returns the old callback for chaining
322
+ GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
323
+
317
324
  GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
318
325
  GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
319
326
 
@@ -470,6 +477,7 @@ extern "C" {
470
477
  GGML_OP_TRANSPOSE,
471
478
  GGML_OP_GET_ROWS,
472
479
  GGML_OP_GET_ROWS_BACK,
480
+ GGML_OP_SET_ROWS,
473
481
  GGML_OP_DIAG,
474
482
  GGML_OP_DIAG_MASK_INF,
475
483
  GGML_OP_DIAG_MASK_ZERO,
@@ -481,12 +489,13 @@ extern "C" {
481
489
  GGML_OP_CONV_TRANSPOSE_1D,
482
490
  GGML_OP_IM2COL,
483
491
  GGML_OP_IM2COL_BACK,
492
+ GGML_OP_CONV_2D,
484
493
  GGML_OP_CONV_2D_DW,
485
494
  GGML_OP_CONV_TRANSPOSE_2D,
486
495
  GGML_OP_POOL_1D,
487
496
  GGML_OP_POOL_2D,
488
497
  GGML_OP_POOL_2D_BACK,
489
- GGML_OP_UPSCALE, // nearest interpolate
498
+ GGML_OP_UPSCALE,
490
499
  GGML_OP_PAD,
491
500
  GGML_OP_PAD_REFLECT_1D,
492
501
  GGML_OP_ROLL,
@@ -519,6 +528,8 @@ extern "C" {
519
528
  GGML_OP_CROSS_ENTROPY_LOSS_BACK,
520
529
  GGML_OP_OPT_STEP_ADAMW,
521
530
 
531
+ GGML_OP_GLU,
532
+
522
533
  GGML_OP_COUNT,
523
534
  };
524
535
 
@@ -542,6 +553,16 @@ extern "C" {
542
553
  GGML_UNARY_OP_COUNT,
543
554
  };
544
555
 
556
+ enum ggml_glu_op {
557
+ GGML_GLU_OP_REGLU,
558
+ GGML_GLU_OP_GEGLU,
559
+ GGML_GLU_OP_SWIGLU,
560
+ GGML_GLU_OP_GEGLU_ERF,
561
+ GGML_GLU_OP_GEGLU_QUICK,
562
+
563
+ GGML_GLU_OP_COUNT,
564
+ };
565
+
545
566
  enum ggml_object_type {
546
567
  GGML_OBJECT_TYPE_TENSOR,
547
568
  GGML_OBJECT_TYPE_GRAPH,
@@ -627,6 +648,9 @@ extern "C" {
627
648
 
628
649
  // misc
629
650
 
651
+ GGML_API const char * ggml_version(void);
652
+ GGML_API const char * ggml_commit(void);
653
+
630
654
  GGML_API void ggml_time_init(void); // call this once at the beginning of the program
631
655
  GGML_API int64_t ggml_time_ms(void);
632
656
  GGML_API int64_t ggml_time_us(void);
@@ -657,6 +681,7 @@ extern "C" {
657
681
  GGML_API const char * ggml_op_symbol(enum ggml_op op);
658
682
 
659
683
  GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
684
+ GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
660
685
  GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
661
686
 
662
687
  GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
@@ -687,6 +712,9 @@ extern "C" {
687
712
  // true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
688
713
  GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
689
714
 
715
+ // true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
716
+ GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor);
717
+
690
718
  GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
691
719
  GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
692
720
 
@@ -758,6 +786,7 @@ extern "C" {
758
786
  GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
759
787
 
760
788
  GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
789
+ GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
761
790
 
762
791
  GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
763
792
  GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
@@ -1086,6 +1115,89 @@ extern "C" {
1086
1115
  struct ggml_context * ctx,
1087
1116
  struct ggml_tensor * a);
1088
1117
 
1118
+ // gated linear unit ops
1119
+ // A: n columns, r rows,
1120
+ // result is n / 2 columns, r rows,
1121
+ // expects gate in second half of row, unless swapped is true
1122
+ GGML_API struct ggml_tensor * ggml_glu(
1123
+ struct ggml_context * ctx,
1124
+ struct ggml_tensor * a,
1125
+ enum ggml_glu_op op,
1126
+ bool swapped);
1127
+
1128
+ GGML_API struct ggml_tensor * ggml_reglu(
1129
+ struct ggml_context * ctx,
1130
+ struct ggml_tensor * a);
1131
+
1132
+ GGML_API struct ggml_tensor * ggml_reglu_swapped(
1133
+ struct ggml_context * ctx,
1134
+ struct ggml_tensor * a);
1135
+
1136
+ GGML_API struct ggml_tensor * ggml_geglu(
1137
+ struct ggml_context * ctx,
1138
+ struct ggml_tensor * a);
1139
+
1140
+ GGML_API struct ggml_tensor * ggml_geglu_swapped(
1141
+ struct ggml_context * ctx,
1142
+ struct ggml_tensor * a);
1143
+
1144
+ GGML_API struct ggml_tensor * ggml_swiglu(
1145
+ struct ggml_context * ctx,
1146
+ struct ggml_tensor * a);
1147
+
1148
+ GGML_API struct ggml_tensor * ggml_swiglu_swapped(
1149
+ struct ggml_context * ctx,
1150
+ struct ggml_tensor * a);
1151
+
1152
+ GGML_API struct ggml_tensor * ggml_geglu_erf(
1153
+ struct ggml_context * ctx,
1154
+ struct ggml_tensor * a);
1155
+
1156
+ GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
1157
+ struct ggml_context * ctx,
1158
+ struct ggml_tensor * a);
1159
+
1160
+ GGML_API struct ggml_tensor * ggml_geglu_quick(
1161
+ struct ggml_context * ctx,
1162
+ struct ggml_tensor * a);
1163
+
1164
+ GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
1165
+ struct ggml_context * ctx,
1166
+ struct ggml_tensor * a);
1167
+
1168
+ // A: n columns, r rows,
1169
+ // B: n columns, r rows,
1170
+ GGML_API struct ggml_tensor * ggml_glu_split(
1171
+ struct ggml_context * ctx,
1172
+ struct ggml_tensor * a,
1173
+ struct ggml_tensor * b,
1174
+ enum ggml_glu_op op);
1175
+
1176
+ GGML_API struct ggml_tensor * ggml_reglu_split(
1177
+ struct ggml_context * ctx,
1178
+ struct ggml_tensor * a,
1179
+ struct ggml_tensor * b);
1180
+
1181
+ GGML_API struct ggml_tensor * ggml_geglu_split(
1182
+ struct ggml_context * ctx,
1183
+ struct ggml_tensor * a,
1184
+ struct ggml_tensor * b);
1185
+
1186
+ GGML_API struct ggml_tensor * ggml_swiglu_split(
1187
+ struct ggml_context * ctx,
1188
+ struct ggml_tensor * a,
1189
+ struct ggml_tensor * b);
1190
+
1191
+ GGML_API struct ggml_tensor * ggml_geglu_erf_split(
1192
+ struct ggml_context * ctx,
1193
+ struct ggml_tensor * a,
1194
+ struct ggml_tensor * b);
1195
+
1196
+ GGML_API struct ggml_tensor * ggml_geglu_quick_split(
1197
+ struct ggml_context * ctx,
1198
+ struct ggml_tensor * a,
1199
+ struct ggml_tensor * b);
1200
+
1089
1201
  // normalize along rows
1090
1202
  GGML_API struct ggml_tensor * ggml_norm(
1091
1203
  struct ggml_context * ctx,
@@ -1375,6 +1487,23 @@ extern "C" {
1375
1487
  struct ggml_tensor * b, // row indices
1376
1488
  struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
1377
1489
 
1490
+ // a TD [n_embd, ne1, ne2, ne3]
1491
+ // b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3
1492
+ // c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1)
1493
+ //
1494
+ // undefined behavior if destination rows overlap
1495
+ //
1496
+ // broadcast:
1497
+ // ne2 % ne11 == 0
1498
+ // ne3 % ne12 == 0
1499
+ //
1500
+ // return view(a)
1501
+ GGML_API struct ggml_tensor * ggml_set_rows(
1502
+ struct ggml_context * ctx,
1503
+ struct ggml_tensor * a, // destination
1504
+ struct ggml_tensor * b, // source
1505
+ struct ggml_tensor * c); // row indices
1506
+
1378
1507
  GGML_API struct ggml_tensor * ggml_diag(
1379
1508
  struct ggml_context * ctx,
1380
1509
  struct ggml_tensor * a);
@@ -1412,8 +1541,14 @@ extern "C" {
1412
1541
  struct ggml_context * ctx,
1413
1542
  struct ggml_tensor * a);
1414
1543
 
1544
+ // a [ne0, ne01, ne02, ne03]
1545
+ // mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
1546
+ //
1547
+ // broadcast:
1548
+ // ne02 % ne12 == 0
1549
+ // ne03 % ne13 == 0
1550
+ //
1415
1551
  // fused soft_max(a*scale + mask*(ALiBi slope))
1416
- // mask is optional
1417
1552
  // max_bias = 0.0f for no ALiBi
1418
1553
  GGML_API struct ggml_tensor * ggml_soft_max_ext(
1419
1554
  struct ggml_context * ctx,
@@ -1723,6 +1858,17 @@ extern "C" {
1723
1858
  struct ggml_tensor * b,
1724
1859
  int stride);
1725
1860
 
1861
+ GGML_API struct ggml_tensor * ggml_conv_2d_direct(
1862
+ struct ggml_context * ctx,
1863
+ struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
1864
+ struct ggml_tensor * b, // input data [W, H, C, N]
1865
+ int s0, // stride dimension 0
1866
+ int s1, // stride dimension 1
1867
+ int p0, // padding dimension 0
1868
+ int p1, // padding dimension 1
1869
+ int d0, // dilation dimension 0
1870
+ int d1); // dilation dimension 1
1871
+
1726
1872
  enum ggml_op_pool {
1727
1873
  GGML_OP_POOL_MAX,
1728
1874
  GGML_OP_POOL_AVG,
@@ -1765,6 +1911,12 @@ extern "C" {
1765
1911
  enum ggml_scale_mode {
1766
1912
  GGML_SCALE_MODE_NEAREST = 0,
1767
1913
  GGML_SCALE_MODE_BILINEAR = 1,
1914
+
1915
+ GGML_SCALE_MODE_COUNT
1916
+ };
1917
+
1918
+ enum ggml_scale_flag {
1919
+ GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
1768
1920
  };
1769
1921
 
1770
1922
  // interpolate
@@ -1777,14 +1929,26 @@ extern "C" {
1777
1929
 
1778
1930
  // interpolate
1779
1931
  // interpolate scale to specified dimensions
1780
- GGML_API struct ggml_tensor * ggml_upscale_ext(
1932
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
1781
1933
  struct ggml_context * ctx,
1782
1934
  struct ggml_tensor * a,
1783
1935
  int ne0,
1784
1936
  int ne1,
1785
1937
  int ne2,
1786
1938
  int ne3,
1787
- enum ggml_scale_mode mode);
1939
+ enum ggml_scale_mode mode),
1940
+ "use ggml_interpolate instead");
1941
+
1942
+ // Up- or downsamples the input to the specified size.
1943
+ // 2D scale modes (eg. bilinear) are applied to the first two dimensions.
1944
+ GGML_API struct ggml_tensor * ggml_interpolate(
1945
+ struct ggml_context * ctx,
1946
+ struct ggml_tensor * a,
1947
+ int64_t ne0,
1948
+ int64_t ne1,
1949
+ int64_t ne2,
1950
+ int64_t ne3,
1951
+ uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
1788
1952
 
1789
1953
  // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
1790
1954
  GGML_API struct ggml_tensor * ggml_pad(
@@ -1847,11 +2011,17 @@ extern "C" {
1847
2011
 
1848
2012
  #define GGML_KQ_MASK_PAD 64
1849
2013
 
1850
- // q: [n_embd_k, n_batch, n_head, 1]
1851
- // k: [n_embd_k, n_kv, n_head_kv, 1]
1852
- // v: [n_embd_v, n_kv, n_head_kv, 1] !! not transposed !!
1853
- // mask: [n_kv, n_batch_pad, 1, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
1854
- // res: [n_embd_v, n_head, n_batch, 1] !! permuted !!
2014
+ // q: [n_embd_k, n_batch, n_head, ne3 ]
2015
+ // k: [n_embd_k, n_kv, n_head_kv, ne3 ]
2016
+ // v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
2017
+ // mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
2018
+ // res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
2019
+ //
2020
+ // broadcast:
2021
+ // n_head % n_head_kv == 0
2022
+ // n_head % ne32 == 0
2023
+ // ne3 % ne33 == 0
2024
+ //
1855
2025
  GGML_API struct ggml_tensor * ggml_flash_attn_ext(
1856
2026
  struct ggml_context * ctx,
1857
2027
  struct ggml_tensor * q,
@@ -1890,7 +2060,8 @@ extern "C" {
1890
2060
  struct ggml_tensor * dt,
1891
2061
  struct ggml_tensor * A,
1892
2062
  struct ggml_tensor * B,
1893
- struct ggml_tensor * C);
2063
+ struct ggml_tensor * C,
2064
+ struct ggml_tensor * ids);
1894
2065
 
1895
2066
  // partition into non-overlapping windows with padding if needed
1896
2067
  // example:
@@ -365,7 +365,6 @@ ggml_add_backend(BLAS)
365
365
  ggml_add_backend(CANN)
366
366
  ggml_add_backend(CUDA)
367
367
  ggml_add_backend(HIP)
368
- ggml_add_backend(Kompute)
369
368
  ggml_add_backend(METAL)
370
369
  ggml_add_backend(MUSA)
371
370
  ggml_add_backend(RPC)
@@ -5,7 +5,7 @@ function(ggml_add_cpu_backend_features cpu_name arch)
5
5
  # build, using set_source_files_properties() to set the arch flags is not possible
6
6
  set(GGML_CPU_FEATS_NAME ${cpu_name}-feats)
7
7
  add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp)
8
- target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
8
+ target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . ../include)
9
9
  target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN})
10
10
  target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
11
11
  set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
@@ -589,4 +589,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
589
589
  if (EMSCRIPTEN)
590
590
  set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
591
591
  endif()
592
+
593
+ if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
594
+ # The compiler automatically enables "-ffast-math" which can cause NaNs in tests due to "-fassociative-math"
595
+ target_compile_options(${GGML_CPU_NAME} PRIVATE "-fno-associative-math")
596
+ endif()
592
597
  endfunction()
@@ -195,6 +195,7 @@ typedef pthread_t ggml_thread_t;
195
195
 
196
196
  static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
197
197
  [GGML_TYPE_F32] = {
198
+ .from_float = (ggml_from_float_t) ggml_cpu_fp32_to_fp32,
198
199
  .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32,
199
200
  .vec_dot_type = GGML_TYPE_F32,
200
201
  .nrows = 1,
@@ -1192,7 +1193,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
1192
1193
  }
1193
1194
  }
1194
1195
 
1195
- static void ggml_compute_forward_mul_mat(
1196
+ void ggml_compute_forward_mul_mat(
1196
1197
  const struct ggml_compute_params * params,
1197
1198
  struct ggml_tensor * dst) {
1198
1199
 
@@ -1817,6 +1818,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
1817
1818
  {
1818
1819
  ggml_compute_forward_get_rows_back(params, tensor);
1819
1820
  } break;
1821
+ case GGML_OP_SET_ROWS:
1822
+ {
1823
+ ggml_compute_forward_set_rows(params, tensor);
1824
+ } break;
1820
1825
  case GGML_OP_DIAG:
1821
1826
  {
1822
1827
  ggml_compute_forward_diag(params, tensor);
@@ -1861,6 +1866,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
1861
1866
  {
1862
1867
  ggml_compute_forward_im2col_back_f32(params, tensor);
1863
1868
  } break;
1869
+ case GGML_OP_CONV_2D:
1870
+ {
1871
+ ggml_compute_forward_conv_2d(params, tensor);
1872
+ } break;
1864
1873
  case GGML_OP_CONV_2D_DW:
1865
1874
  {
1866
1875
  ggml_compute_forward_conv_2d_dw(params, tensor);
@@ -1944,6 +1953,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
1944
1953
  {
1945
1954
  ggml_compute_forward_unary(params, tensor);
1946
1955
  } break;
1956
+ case GGML_OP_GLU:
1957
+ {
1958
+ ggml_compute_forward_glu(params, tensor);
1959
+ } break;
1947
1960
  case GGML_OP_GET_REL_POS:
1948
1961
  {
1949
1962
  ggml_compute_forward_get_rel_pos(params, tensor);
@@ -2154,6 +2167,20 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
2154
2167
  GGML_ABORT("fatal error");
2155
2168
  }
2156
2169
  break;
2170
+ case GGML_OP_GLU:
2171
+ switch (ggml_get_glu_op(node)) {
2172
+ case GGML_GLU_OP_REGLU:
2173
+ case GGML_GLU_OP_GEGLU:
2174
+ case GGML_GLU_OP_SWIGLU:
2175
+ case GGML_GLU_OP_GEGLU_ERF:
2176
+ case GGML_GLU_OP_GEGLU_QUICK:
2177
+ {
2178
+ n_tasks = n_threads;
2179
+ } break;
2180
+ default:
2181
+ GGML_ABORT("fatal error");
2182
+ }
2183
+ break;
2157
2184
  case GGML_OP_SILU_BACK:
2158
2185
  case GGML_OP_MUL:
2159
2186
  case GGML_OP_DIV:
@@ -2170,6 +2197,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
2170
2197
  n_tasks = n_threads;
2171
2198
  } break;
2172
2199
  case GGML_OP_GET_ROWS:
2200
+ case GGML_OP_SET_ROWS:
2173
2201
  {
2174
2202
  // FIXME: get_rows can use additional threads, but the cost of launching additional threads
2175
2203
  // decreases performance with GPU offloading
@@ -2206,6 +2234,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
2206
2234
  } break;
2207
2235
  case GGML_OP_IM2COL:
2208
2236
  case GGML_OP_IM2COL_BACK:
2237
+ case GGML_OP_CONV_2D:
2209
2238
  case GGML_OP_CONV_2D_DW:
2210
2239
  case GGML_OP_CONV_TRANSPOSE_1D:
2211
2240
  case GGML_OP_CONV_TRANSPOSE_2D:
@@ -2724,6 +2753,10 @@ struct ggml_cplan ggml_graph_plan(
2724
2753
  GGML_ABORT("fatal error");
2725
2754
  }
2726
2755
  } break;
2756
+ case GGML_OP_CONV_2D:
2757
+ {
2758
+ cur = GGML_IM2COL_WORK_SIZE;
2759
+ } break;
2727
2760
  case GGML_OP_CONV_TRANSPOSE_2D:
2728
2761
  {
2729
2762
  const int64_t ne00 = node->src[0]->ne[0]; // W
@@ -3124,6 +3157,10 @@ enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct g
3124
3157
  return ggml_graph_compute(cgraph, &cplan);
3125
3158
  }
3126
3159
 
3160
+ void ggml_cpu_fp32_to_fp32(const float * x, float * y, int64_t n) {
3161
+ memcpy(y, x, n * sizeof(float));
3162
+ }
3163
+
3127
3164
  void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
3128
3165
  int64_t i = 0;
3129
3166
  #if defined(__F16C__)
@@ -416,6 +416,7 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
416
416
 
417
417
  switch (op->op) {
418
418
  case GGML_OP_CPY:
419
+ case GGML_OP_SET_ROWS:
419
420
  return
420
421
  op->type != GGML_TYPE_IQ3_XXS &&
421
422
  op->type != GGML_TYPE_IQ3_S &&