@fugood/llama.node 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/src/llama.cpp/CMakeLists.txt +0 -1
- package/src/llama.cpp/common/arg.cpp +7 -0
- package/src/llama.cpp/common/common.h +1 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +7 -2
- package/src/llama.cpp/ggml/include/ggml.h +91 -10
- package/src/llama.cpp/ggml/src/CMakeLists.txt +0 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +6 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +12 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +726 -155
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +5 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +9 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +49 -9
- package/src/llama.cpp/include/llama.h +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +90 -2
- package/src/llama.cpp/src/llama-arch.h +6 -0
- package/src/llama.cpp/src/llama-batch.cpp +27 -1
- package/src/llama.cpp/src/llama-batch.h +8 -1
- package/src/llama.cpp/src/llama-chat.cpp +15 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +64 -50
- package/src/llama.cpp/src/llama-graph.h +41 -16
- package/src/llama.cpp/src/llama-hparams.cpp +2 -1
- package/src/llama.cpp/src/llama-hparams.h +1 -0
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +28 -18
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +4 -2
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +214 -65
- package/src/llama.cpp/src/llama-kv-cache-unified.h +62 -24
- package/src/llama.cpp/src/llama-kv-cells.h +62 -10
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +9 -4
- package/src/llama.cpp/src/llama-memory-hybrid.h +3 -1
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +15 -2
- package/src/llama.cpp/src/llama-memory.cpp +17 -0
- package/src/llama.cpp/src/llama-memory.h +3 -0
- package/src/llama.cpp/src/llama-model.cpp +1234 -248
- package/src/llama.cpp/src/llama-model.h +2 -0
- package/src/llama.cpp/src/llama-vocab.cpp +8 -1
- package/src/llama.cpp/ggml/include/ggml-kompute.h +0 -50
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.0.
|
|
4
|
+
"version": "1.0.3",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -70,19 +70,19 @@
|
|
|
70
70
|
"CMakeLists.txt"
|
|
71
71
|
],
|
|
72
72
|
"optionalDependencies": {
|
|
73
|
-
"@fugood/node-llama-linux-x64": "1.0.
|
|
74
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.0.
|
|
75
|
-
"@fugood/node-llama-linux-x64-cuda": "1.0.
|
|
76
|
-
"@fugood/node-llama-linux-arm64": "1.0.
|
|
77
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.0.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.0.
|
|
79
|
-
"@fugood/node-llama-win32-x64": "1.0.
|
|
80
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.0.
|
|
81
|
-
"@fugood/node-llama-win32-x64-cuda": "1.0.
|
|
82
|
-
"@fugood/node-llama-win32-arm64": "1.0.
|
|
83
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.0.
|
|
84
|
-
"@fugood/node-llama-darwin-x64": "1.0.
|
|
85
|
-
"@fugood/node-llama-darwin-arm64": "1.0.
|
|
73
|
+
"@fugood/node-llama-linux-x64": "1.0.3",
|
|
74
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.0.3",
|
|
75
|
+
"@fugood/node-llama-linux-x64-cuda": "1.0.3",
|
|
76
|
+
"@fugood/node-llama-linux-arm64": "1.0.3",
|
|
77
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.0.3",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.0.3",
|
|
79
|
+
"@fugood/node-llama-win32-x64": "1.0.3",
|
|
80
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.0.3",
|
|
81
|
+
"@fugood/node-llama-win32-x64-cuda": "1.0.3",
|
|
82
|
+
"@fugood/node-llama-win32-arm64": "1.0.3",
|
|
83
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.0.3",
|
|
84
|
+
"@fugood/node-llama-darwin-x64": "1.0.3",
|
|
85
|
+
"@fugood/node-llama-darwin-arm64": "1.0.3"
|
|
86
86
|
},
|
|
87
87
|
"devDependencies": {
|
|
88
88
|
"@babel/preset-env": "^7.24.4",
|
|
@@ -120,7 +120,6 @@ endfunction()
|
|
|
120
120
|
|
|
121
121
|
llama_option_depr(FATAL_ERROR LLAMA_CUBLAS GGML_CUDA)
|
|
122
122
|
llama_option_depr(WARNING LLAMA_CUDA GGML_CUDA)
|
|
123
|
-
llama_option_depr(WARNING LLAMA_KOMPUTE GGML_KOMPUTE)
|
|
124
123
|
llama_option_depr(WARNING LLAMA_METAL GGML_METAL)
|
|
125
124
|
llama_option_depr(WARNING LLAMA_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
|
|
126
125
|
llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE)
|
|
@@ -2734,6 +2734,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2734
2734
|
params.public_path = value;
|
|
2735
2735
|
}
|
|
2736
2736
|
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH"));
|
|
2737
|
+
add_opt(common_arg(
|
|
2738
|
+
{"--api-prefix"}, "PREFIX",
|
|
2739
|
+
string_format("prefix path the server serves from, without the trailing slash (default: %s)", params.api_prefix.c_str()),
|
|
2740
|
+
[](common_params & params, const std::string & value) {
|
|
2741
|
+
params.api_prefix = value;
|
|
2742
|
+
}
|
|
2743
|
+
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_API_PREFIX"));
|
|
2737
2744
|
add_opt(common_arg(
|
|
2738
2745
|
{"--no-webui"},
|
|
2739
2746
|
string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"),
|
|
@@ -371,6 +371,7 @@ struct common_params {
|
|
|
371
371
|
|
|
372
372
|
std::string hostname = "127.0.0.1";
|
|
373
373
|
std::string public_path = ""; // NOLINT
|
|
374
|
+
std::string api_prefix = ""; // NOLINT
|
|
374
375
|
std::string chat_template = ""; // NOLINT
|
|
375
376
|
bool use_jinja = false; // NOLINT
|
|
376
377
|
bool enable_chat_template = true;
|
|
@@ -181,7 +181,6 @@ option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug ou
|
|
|
181
181
|
option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
|
|
182
182
|
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
|
|
183
183
|
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
|
184
|
-
option(GGML_KOMPUTE "ggml: use Kompute" OFF)
|
|
185
184
|
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
|
|
186
185
|
option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
|
|
187
186
|
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
|
|
@@ -266,7 +265,6 @@ set(GGML_PUBLIC_HEADERS
|
|
|
266
265
|
include/ggml-cann.h
|
|
267
266
|
include/ggml-cpp.h
|
|
268
267
|
include/ggml-cuda.h
|
|
269
|
-
include/ggml-kompute.h
|
|
270
268
|
include/ggml-opt.h
|
|
271
269
|
include/ggml-metal.h
|
|
272
270
|
include/ggml-rpc.h
|
|
@@ -360,6 +358,13 @@ write_basic_package_version_file(
|
|
|
360
358
|
VERSION ${GGML_INSTALL_VERSION}
|
|
361
359
|
COMPATIBILITY SameMajorVersion)
|
|
362
360
|
|
|
361
|
+
target_compile_definitions(ggml-base PRIVATE
|
|
362
|
+
GGML_VERSION="${GGML_INSTALL_VERSION}"
|
|
363
|
+
GGML_COMMIT="${GGML_BUILD_COMMIT}"
|
|
364
|
+
)
|
|
365
|
+
message(STATUS "ggml version: ${GGML_INSTALL_VERSION}")
|
|
366
|
+
message(STATUS "ggml commit: ${GGML_BUILD_COMMIT}")
|
|
367
|
+
|
|
363
368
|
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake
|
|
364
369
|
${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake
|
|
365
370
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml)
|
|
@@ -314,6 +314,13 @@
|
|
|
314
314
|
extern "C" {
|
|
315
315
|
#endif
|
|
316
316
|
|
|
317
|
+
// Function type used in fatal error callbacks
|
|
318
|
+
typedef void (*ggml_abort_callback_t)(const char * error_message);
|
|
319
|
+
|
|
320
|
+
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
|
|
321
|
+
// Returns the old callback for chaining
|
|
322
|
+
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
|
|
323
|
+
|
|
317
324
|
GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
|
|
318
325
|
GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
|
|
319
326
|
|
|
@@ -482,12 +489,13 @@ extern "C" {
|
|
|
482
489
|
GGML_OP_CONV_TRANSPOSE_1D,
|
|
483
490
|
GGML_OP_IM2COL,
|
|
484
491
|
GGML_OP_IM2COL_BACK,
|
|
492
|
+
GGML_OP_CONV_2D,
|
|
485
493
|
GGML_OP_CONV_2D_DW,
|
|
486
494
|
GGML_OP_CONV_TRANSPOSE_2D,
|
|
487
495
|
GGML_OP_POOL_1D,
|
|
488
496
|
GGML_OP_POOL_2D,
|
|
489
497
|
GGML_OP_POOL_2D_BACK,
|
|
490
|
-
GGML_OP_UPSCALE,
|
|
498
|
+
GGML_OP_UPSCALE,
|
|
491
499
|
GGML_OP_PAD,
|
|
492
500
|
GGML_OP_PAD_REFLECT_1D,
|
|
493
501
|
GGML_OP_ROLL,
|
|
@@ -549,6 +557,8 @@ extern "C" {
|
|
|
549
557
|
GGML_GLU_OP_REGLU,
|
|
550
558
|
GGML_GLU_OP_GEGLU,
|
|
551
559
|
GGML_GLU_OP_SWIGLU,
|
|
560
|
+
GGML_GLU_OP_GEGLU_ERF,
|
|
561
|
+
GGML_GLU_OP_GEGLU_QUICK,
|
|
552
562
|
|
|
553
563
|
GGML_GLU_OP_COUNT,
|
|
554
564
|
};
|
|
@@ -638,6 +648,9 @@ extern "C" {
|
|
|
638
648
|
|
|
639
649
|
// misc
|
|
640
650
|
|
|
651
|
+
GGML_API const char * ggml_version(void);
|
|
652
|
+
GGML_API const char * ggml_commit(void);
|
|
653
|
+
|
|
641
654
|
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
|
|
642
655
|
GGML_API int64_t ggml_time_ms(void);
|
|
643
656
|
GGML_API int64_t ggml_time_us(void);
|
|
@@ -1136,6 +1149,22 @@ extern "C" {
|
|
|
1136
1149
|
struct ggml_context * ctx,
|
|
1137
1150
|
struct ggml_tensor * a);
|
|
1138
1151
|
|
|
1152
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf(
|
|
1153
|
+
struct ggml_context * ctx,
|
|
1154
|
+
struct ggml_tensor * a);
|
|
1155
|
+
|
|
1156
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
|
|
1157
|
+
struct ggml_context * ctx,
|
|
1158
|
+
struct ggml_tensor * a);
|
|
1159
|
+
|
|
1160
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick(
|
|
1161
|
+
struct ggml_context * ctx,
|
|
1162
|
+
struct ggml_tensor * a);
|
|
1163
|
+
|
|
1164
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
|
|
1165
|
+
struct ggml_context * ctx,
|
|
1166
|
+
struct ggml_tensor * a);
|
|
1167
|
+
|
|
1139
1168
|
// A: n columns, r rows,
|
|
1140
1169
|
// B: n columns, r rows,
|
|
1141
1170
|
GGML_API struct ggml_tensor * ggml_glu_split(
|
|
@@ -1159,6 +1188,16 @@ extern "C" {
|
|
|
1159
1188
|
struct ggml_tensor * a,
|
|
1160
1189
|
struct ggml_tensor * b);
|
|
1161
1190
|
|
|
1191
|
+
GGML_API struct ggml_tensor * ggml_geglu_erf_split(
|
|
1192
|
+
struct ggml_context * ctx,
|
|
1193
|
+
struct ggml_tensor * a,
|
|
1194
|
+
struct ggml_tensor * b);
|
|
1195
|
+
|
|
1196
|
+
GGML_API struct ggml_tensor * ggml_geglu_quick_split(
|
|
1197
|
+
struct ggml_context * ctx,
|
|
1198
|
+
struct ggml_tensor * a,
|
|
1199
|
+
struct ggml_tensor * b);
|
|
1200
|
+
|
|
1162
1201
|
// normalize along rows
|
|
1163
1202
|
GGML_API struct ggml_tensor * ggml_norm(
|
|
1164
1203
|
struct ggml_context * ctx,
|
|
@@ -1502,8 +1541,14 @@ extern "C" {
|
|
|
1502
1541
|
struct ggml_context * ctx,
|
|
1503
1542
|
struct ggml_tensor * a);
|
|
1504
1543
|
|
|
1544
|
+
// a [ne0, ne01, ne02, ne03]
|
|
1545
|
+
// mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
|
|
1546
|
+
//
|
|
1547
|
+
// broadcast:
|
|
1548
|
+
// ne02 % ne12 == 0
|
|
1549
|
+
// ne03 % ne13 == 0
|
|
1550
|
+
//
|
|
1505
1551
|
// fused soft_max(a*scale + mask*(ALiBi slope))
|
|
1506
|
-
// mask is optional
|
|
1507
1552
|
// max_bias = 0.0f for no ALiBi
|
|
1508
1553
|
GGML_API struct ggml_tensor * ggml_soft_max_ext(
|
|
1509
1554
|
struct ggml_context * ctx,
|
|
@@ -1813,6 +1858,17 @@ extern "C" {
|
|
|
1813
1858
|
struct ggml_tensor * b,
|
|
1814
1859
|
int stride);
|
|
1815
1860
|
|
|
1861
|
+
GGML_API struct ggml_tensor * ggml_conv_2d_direct(
|
|
1862
|
+
struct ggml_context * ctx,
|
|
1863
|
+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
|
|
1864
|
+
struct ggml_tensor * b, // input data [W, H, C, N]
|
|
1865
|
+
int s0, // stride dimension 0
|
|
1866
|
+
int s1, // stride dimension 1
|
|
1867
|
+
int p0, // padding dimension 0
|
|
1868
|
+
int p1, // padding dimension 1
|
|
1869
|
+
int d0, // dilation dimension 0
|
|
1870
|
+
int d1); // dilation dimension 1
|
|
1871
|
+
|
|
1816
1872
|
enum ggml_op_pool {
|
|
1817
1873
|
GGML_OP_POOL_MAX,
|
|
1818
1874
|
GGML_OP_POOL_AVG,
|
|
@@ -1855,6 +1911,12 @@ extern "C" {
|
|
|
1855
1911
|
enum ggml_scale_mode {
|
|
1856
1912
|
GGML_SCALE_MODE_NEAREST = 0,
|
|
1857
1913
|
GGML_SCALE_MODE_BILINEAR = 1,
|
|
1914
|
+
|
|
1915
|
+
GGML_SCALE_MODE_COUNT
|
|
1916
|
+
};
|
|
1917
|
+
|
|
1918
|
+
enum ggml_scale_flag {
|
|
1919
|
+
GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
|
|
1858
1920
|
};
|
|
1859
1921
|
|
|
1860
1922
|
// interpolate
|
|
@@ -1867,14 +1929,26 @@ extern "C" {
|
|
|
1867
1929
|
|
|
1868
1930
|
// interpolate
|
|
1869
1931
|
// interpolate scale to specified dimensions
|
|
1870
|
-
GGML_API struct ggml_tensor * ggml_upscale_ext(
|
|
1932
|
+
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
|
|
1871
1933
|
struct ggml_context * ctx,
|
|
1872
1934
|
struct ggml_tensor * a,
|
|
1873
1935
|
int ne0,
|
|
1874
1936
|
int ne1,
|
|
1875
1937
|
int ne2,
|
|
1876
1938
|
int ne3,
|
|
1877
|
-
enum ggml_scale_mode mode)
|
|
1939
|
+
enum ggml_scale_mode mode),
|
|
1940
|
+
"use ggml_interpolate instead");
|
|
1941
|
+
|
|
1942
|
+
// Up- or downsamples the input to the specified size.
|
|
1943
|
+
// 2D scale modes (eg. bilinear) are applied to the first two dimensions.
|
|
1944
|
+
GGML_API struct ggml_tensor * ggml_interpolate(
|
|
1945
|
+
struct ggml_context * ctx,
|
|
1946
|
+
struct ggml_tensor * a,
|
|
1947
|
+
int64_t ne0,
|
|
1948
|
+
int64_t ne1,
|
|
1949
|
+
int64_t ne2,
|
|
1950
|
+
int64_t ne3,
|
|
1951
|
+
uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
|
|
1878
1952
|
|
|
1879
1953
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
|
1880
1954
|
GGML_API struct ggml_tensor * ggml_pad(
|
|
@@ -1937,11 +2011,17 @@ extern "C" {
|
|
|
1937
2011
|
|
|
1938
2012
|
#define GGML_KQ_MASK_PAD 64
|
|
1939
2013
|
|
|
1940
|
-
// q: [n_embd_k, n_batch, n_head,
|
|
1941
|
-
// k: [n_embd_k, n_kv, n_head_kv,
|
|
1942
|
-
// v: [n_embd_v, n_kv, n_head_kv,
|
|
1943
|
-
// mask: [n_kv, n_batch_pad,
|
|
1944
|
-
// res: [n_embd_v, n_head, n_batch,
|
|
2014
|
+
// q: [n_embd_k, n_batch, n_head, ne3 ]
|
|
2015
|
+
// k: [n_embd_k, n_kv, n_head_kv, ne3 ]
|
|
2016
|
+
// v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
|
|
2017
|
+
// mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
|
|
2018
|
+
// res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
|
|
2019
|
+
//
|
|
2020
|
+
// broadcast:
|
|
2021
|
+
// n_head % n_head_kv == 0
|
|
2022
|
+
// n_head % ne32 == 0
|
|
2023
|
+
// ne3 % ne33 == 0
|
|
2024
|
+
//
|
|
1945
2025
|
GGML_API struct ggml_tensor * ggml_flash_attn_ext(
|
|
1946
2026
|
struct ggml_context * ctx,
|
|
1947
2027
|
struct ggml_tensor * q,
|
|
@@ -1980,7 +2060,8 @@ extern "C" {
|
|
|
1980
2060
|
struct ggml_tensor * dt,
|
|
1981
2061
|
struct ggml_tensor * A,
|
|
1982
2062
|
struct ggml_tensor * B,
|
|
1983
|
-
struct ggml_tensor * C
|
|
2063
|
+
struct ggml_tensor * C,
|
|
2064
|
+
struct ggml_tensor * ids);
|
|
1984
2065
|
|
|
1985
2066
|
// partition into non-overlapping windows with padding if needed
|
|
1986
2067
|
// example:
|
|
@@ -5,7 +5,7 @@ function(ggml_add_cpu_backend_features cpu_name arch)
|
|
|
5
5
|
# build, using set_source_files_properties() to set the arch flags is not possible
|
|
6
6
|
set(GGML_CPU_FEATS_NAME ${cpu_name}-feats)
|
|
7
7
|
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp)
|
|
8
|
-
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE .
|
|
8
|
+
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . ../include)
|
|
9
9
|
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN})
|
|
10
10
|
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
|
|
11
11
|
set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
@@ -589,4 +589,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
589
589
|
if (EMSCRIPTEN)
|
|
590
590
|
set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
|
|
591
591
|
endif()
|
|
592
|
+
|
|
593
|
+
if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
|
|
594
|
+
# The compiler automatically enables "-ffast-math" which can cause NaNs in tests due to "-fassociative-math"
|
|
595
|
+
target_compile_options(${GGML_CPU_NAME} PRIVATE "-fno-associative-math")
|
|
596
|
+
endif()
|
|
592
597
|
endfunction()
|
|
@@ -1193,7 +1193,7 @@ static void ggml_compute_forward_mul_mat_one_chunk(
|
|
|
1193
1193
|
}
|
|
1194
1194
|
}
|
|
1195
1195
|
|
|
1196
|
-
|
|
1196
|
+
void ggml_compute_forward_mul_mat(
|
|
1197
1197
|
const struct ggml_compute_params * params,
|
|
1198
1198
|
struct ggml_tensor * dst) {
|
|
1199
1199
|
|
|
@@ -1866,6 +1866,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
|
1866
1866
|
{
|
|
1867
1867
|
ggml_compute_forward_im2col_back_f32(params, tensor);
|
|
1868
1868
|
} break;
|
|
1869
|
+
case GGML_OP_CONV_2D:
|
|
1870
|
+
{
|
|
1871
|
+
ggml_compute_forward_conv_2d(params, tensor);
|
|
1872
|
+
} break;
|
|
1869
1873
|
case GGML_OP_CONV_2D_DW:
|
|
1870
1874
|
{
|
|
1871
1875
|
ggml_compute_forward_conv_2d_dw(params, tensor);
|
|
@@ -2168,6 +2172,8 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2168
2172
|
case GGML_GLU_OP_REGLU:
|
|
2169
2173
|
case GGML_GLU_OP_GEGLU:
|
|
2170
2174
|
case GGML_GLU_OP_SWIGLU:
|
|
2175
|
+
case GGML_GLU_OP_GEGLU_ERF:
|
|
2176
|
+
case GGML_GLU_OP_GEGLU_QUICK:
|
|
2171
2177
|
{
|
|
2172
2178
|
n_tasks = n_threads;
|
|
2173
2179
|
} break;
|
|
@@ -2228,6 +2234,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
|
2228
2234
|
} break;
|
|
2229
2235
|
case GGML_OP_IM2COL:
|
|
2230
2236
|
case GGML_OP_IM2COL_BACK:
|
|
2237
|
+
case GGML_OP_CONV_2D:
|
|
2231
2238
|
case GGML_OP_CONV_2D_DW:
|
|
2232
2239
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
|
2233
2240
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
|
@@ -2746,6 +2753,10 @@ struct ggml_cplan ggml_graph_plan(
|
|
|
2746
2753
|
GGML_ABORT("fatal error");
|
|
2747
2754
|
}
|
|
2748
2755
|
} break;
|
|
2756
|
+
case GGML_OP_CONV_2D:
|
|
2757
|
+
{
|
|
2758
|
+
cur = GGML_IM2COL_WORK_SIZE;
|
|
2759
|
+
} break;
|
|
2749
2760
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
|
2750
2761
|
{
|
|
2751
2762
|
const int64_t ne00 = node->src[0]->ne[0]; // W
|