@fugood/llama.node 1.3.1 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +4 -3
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +6 -6
- package/src/llama.cpp/CMakeLists.txt +4 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -37
- package/src/llama.cpp/common/arg.cpp +7 -0
- package/src/llama.cpp/common/common.cpp +1 -5
- package/src/llama.cpp/common/common.h +2 -1
- package/src/llama.cpp/common/download.cpp +47 -29
- package/src/llama.cpp/common/log.cpp +6 -0
- package/src/llama.cpp/common/log.h +2 -0
- package/src/llama.cpp/ggml/include/ggml.h +71 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +34 -11
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +50 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +283 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +235 -34
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +289 -317
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +4 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +95 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +10 -0
- package/src/llama.cpp/src/CMakeLists.txt +6 -0
- package/src/llama.cpp/src/llama-arch.cpp +32 -0
- package/src/llama.cpp/src/llama-arch.h +2 -0
- package/src/llama.cpp/src/llama-graph.cpp +2 -1
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +4 -3
- package/src/llama.cpp/src/llama-model.cpp +102 -0
- package/src/llama.cpp/src/llama-model.h +2 -0
- package/src/llama.cpp/src/llama-sampling.cpp +10 -5
- package/src/llama.cpp/src/llama-vocab.cpp +16 -1
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/models/afmoe.cpp +187 -0
- package/src/llama.cpp/src/models/ernie4-5.cpp +4 -5
- package/src/llama.cpp/src/models/models.h +4 -0
- package/src/llama.cpp/src/models/openai-moe-iswa.cpp +2 -1
- package/src/llama.cpp/src/unicode.cpp +77 -0
package/CMakeLists.txt
CHANGED
|
@@ -121,6 +121,8 @@ endif()
|
|
|
121
121
|
|
|
122
122
|
set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build common")
|
|
123
123
|
|
|
124
|
+
set(LLAMA_BUILD_TOOLS ON CACHE BOOL "Build tools")
|
|
125
|
+
|
|
124
126
|
set(LLAMA_CURL OFF CACHE BOOL "Build curl")
|
|
125
127
|
|
|
126
128
|
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
|
|
@@ -128,7 +130,6 @@ set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
|
|
|
128
130
|
add_definitions(-DGGML_MAX_NAME=80)
|
|
129
131
|
|
|
130
132
|
add_subdirectory("src/llama.cpp")
|
|
131
|
-
add_subdirectory("src/llama.cpp/tools/mtmd")
|
|
132
133
|
|
|
133
134
|
include_directories(
|
|
134
135
|
${CMAKE_JS_INC}
|
|
@@ -172,7 +173,7 @@ if (NOT MSVC AND CMAKE_SYSTEM_NAME STREQUAL "Windows")
|
|
|
172
173
|
|
|
173
174
|
add_library(win_dynamic_load ${WIN_DYNAMIC_LOAD_SRC})
|
|
174
175
|
set_target_properties(win_dynamic_load PROPERTIES COMPILE_FLAGS "-Wno-implicit-function-declaration")
|
|
175
|
-
|
|
176
|
+
|
|
176
177
|
unset(CMAKE_JS_SRC)
|
|
177
178
|
unset(CMAKE_JS_LIB)
|
|
178
179
|
unset(CMAKE_JS_NODELIB_DEF)
|
|
@@ -207,7 +208,7 @@ if(CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET)
|
|
|
207
208
|
endif()
|
|
208
209
|
|
|
209
210
|
if (GGML_METAL AND NOT GGML_METAL_EMBED_LIBRARY)
|
|
210
|
-
# copy ${CMAKE_BINARY_DIR}/bin/default.metallib
|
|
211
|
+
# copy ${CMAKE_BINARY_DIR}/bin/default.metallib
|
|
211
212
|
add_custom_command(
|
|
212
213
|
TARGET copy_assets
|
|
213
214
|
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/bin/default.metallib ${METAL_LIB_TARGET_PATH}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.3.
|
|
4
|
+
"version": "1.3.3",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,19 +72,19 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-linux-x64": "1.3.
|
|
76
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.3.
|
|
77
|
-
"@fugood/node-llama-linux-x64-cuda": "1.3.
|
|
78
|
-
"@fugood/node-llama-linux-arm64": "1.3.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.3.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.3.
|
|
81
|
-
"@fugood/node-llama-win32-x64": "1.3.
|
|
82
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.3.
|
|
83
|
-
"@fugood/node-llama-win32-x64-cuda": "1.3.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.3.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.3.
|
|
86
|
-
"@fugood/node-llama-darwin-x64": "1.3.
|
|
87
|
-
"@fugood/node-llama-darwin-arm64": "1.3.
|
|
75
|
+
"@fugood/node-llama-linux-x64": "1.3.3",
|
|
76
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.3.3",
|
|
77
|
+
"@fugood/node-llama-linux-x64-cuda": "1.3.3",
|
|
78
|
+
"@fugood/node-llama-linux-arm64": "1.3.3",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.3.3",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.3.3",
|
|
81
|
+
"@fugood/node-llama-win32-x64": "1.3.3",
|
|
82
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.3.3",
|
|
83
|
+
"@fugood/node-llama-win32-x64-cuda": "1.3.3",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.3.3",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.3.3",
|
|
86
|
+
"@fugood/node-llama-darwin-x64": "1.3.3",
|
|
87
|
+
"@fugood/node-llama-darwin-arm64": "1.3.3"
|
|
88
88
|
},
|
|
89
89
|
"devDependencies": {
|
|
90
90
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
|
|
2
|
-
index
|
|
2
|
+
index 706fa32ee..248459903 100644
|
|
3
3
|
--- a/src/llama.cpp/common/CMakeLists.txt
|
|
4
4
|
+++ b/src/llama.cpp/common/CMakeLists.txt
|
|
5
|
-
@@ -
|
|
5
|
+
@@ -141,9 +141,16 @@ if (LLAMA_LLGUIDANCE)
|
|
6
6
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
|
|
7
7
|
endif ()
|
|
8
8
|
|
|
@@ -85,10 +85,10 @@ index 50efb0d4e..f471a84c7 100644
|
|
|
85
85
|
struct common_chat_tool_call {
|
|
86
86
|
std::string name;
|
|
87
87
|
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
|
|
88
|
-
index
|
|
88
|
+
index 4dc95dcba..ea0ea86c0 100644
|
|
89
89
|
--- a/src/llama.cpp/common/common.cpp
|
|
90
90
|
+++ b/src/llama.cpp/common/common.cpp
|
|
91
|
-
@@ -
|
|
91
|
+
@@ -1155,6 +1155,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
92
92
|
mparams.n_gpu_layers = params.n_gpu_layers;
|
|
93
93
|
}
|
|
94
94
|
|
|
@@ -97,7 +97,7 @@ index b0591e84b..93759f884 100644
|
|
|
97
97
|
mparams.split_mode = params.split_mode;
|
|
98
98
|
mparams.tensor_split = params.tensor_split;
|
|
99
99
|
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
|
|
100
|
-
index
|
|
100
|
+
index f42c083fa..c573cc812 100644
|
|
101
101
|
--- a/src/llama.cpp/common/common.h
|
|
102
102
|
+++ b/src/llama.cpp/common/common.h
|
|
103
103
|
@@ -274,6 +274,7 @@ struct lr_opt {
|
|
@@ -109,7 +109,7 @@ index a8cb630ea..0919ec5d3 100644
|
|
|
109
109
|
int32_t n_ctx = 4096; // context size
|
|
110
110
|
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
|
111
111
|
diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
112
|
-
index
|
|
112
|
+
index e52e050a8..c1000c162 100644
|
|
113
113
|
--- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
114
114
|
+++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
115
115
|
@@ -106,7 +106,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
@@ -92,6 +92,7 @@ option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_
|
|
|
92
92
|
|
|
93
93
|
# 3rd party libs
|
|
94
94
|
option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
|
|
95
|
+
option(LLAMA_HTTPLIB "llama: if libcurl is disabled, use httplib to download model from an URL" ON)
|
|
95
96
|
option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" OFF)
|
|
96
97
|
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
|
|
97
98
|
|
|
@@ -200,6 +201,9 @@ endif()
|
|
|
200
201
|
|
|
201
202
|
if (LLAMA_BUILD_COMMON)
|
|
202
203
|
add_subdirectory(common)
|
|
204
|
+
if (LLAMA_HTTPLIB)
|
|
205
|
+
add_subdirectory(vendor/cpp-httplib)
|
|
206
|
+
endif()
|
|
203
207
|
endif()
|
|
204
208
|
|
|
205
209
|
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
|
@@ -79,10 +79,11 @@ if (BUILD_SHARED_LIBS)
|
|
|
79
79
|
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
80
80
|
endif()
|
|
81
81
|
|
|
82
|
+
# TODO: use list(APPEND LLAMA_COMMON_EXTRA_LIBS ...)
|
|
82
83
|
set(LLAMA_COMMON_EXTRA_LIBS build_info)
|
|
83
84
|
|
|
84
|
-
# Use curl to download model url
|
|
85
85
|
if (LLAMA_CURL)
|
|
86
|
+
# Use curl to download model url
|
|
86
87
|
find_package(CURL)
|
|
87
88
|
if (NOT CURL_FOUND)
|
|
88
89
|
message(FATAL_ERROR "Could NOT find CURL. Hint: to disable this feature, set -DLLAMA_CURL=OFF")
|
|
@@ -90,42 +91,10 @@ if (LLAMA_CURL)
|
|
|
90
91
|
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
|
|
91
92
|
include_directories(${CURL_INCLUDE_DIRS})
|
|
92
93
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
if (OpenSSL_FOUND)
|
|
98
|
-
include(CheckCSourceCompiles)
|
|
99
|
-
set(SAVED_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
|
|
100
|
-
set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
|
|
101
|
-
check_c_source_compiles("
|
|
102
|
-
#include <openssl/opensslv.h>
|
|
103
|
-
#if defined(OPENSSL_IS_BORINGSSL) || defined(LIBRESSL_VERSION_NUMBER)
|
|
104
|
-
# if OPENSSL_VERSION_NUMBER < 0x1010107f
|
|
105
|
-
# error bad version
|
|
106
|
-
# endif
|
|
107
|
-
#else
|
|
108
|
-
# if OPENSSL_VERSION_NUMBER < 0x30000000L
|
|
109
|
-
# error bad version
|
|
110
|
-
# endif
|
|
111
|
-
#endif
|
|
112
|
-
int main() { return 0; }
|
|
113
|
-
" OPENSSL_VERSION_SUPPORTED)
|
|
114
|
-
set(CMAKE_REQUIRED_INCLUDES ${SAVED_CMAKE_REQUIRED_INCLUDES})
|
|
115
|
-
if (OPENSSL_VERSION_SUPPORTED)
|
|
116
|
-
message(STATUS "OpenSSL found: ${OPENSSL_VERSION}")
|
|
117
|
-
target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_OPENSSL_SUPPORT)
|
|
118
|
-
target_link_libraries(${TARGET} PUBLIC OpenSSL::SSL OpenSSL::Crypto)
|
|
119
|
-
if (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin")
|
|
120
|
-
target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN)
|
|
121
|
-
find_library(CORE_FOUNDATION_FRAMEWORK CoreFoundation REQUIRED)
|
|
122
|
-
find_library(SECURITY_FRAMEWORK Security REQUIRED)
|
|
123
|
-
target_link_libraries(${TARGET} PUBLIC ${CORE_FOUNDATION_FRAMEWORK} ${SECURITY_FRAMEWORK})
|
|
124
|
-
endif()
|
|
125
|
-
endif()
|
|
126
|
-
else()
|
|
127
|
-
message(STATUS "OpenSSL not found, SSL support disabled")
|
|
128
|
-
endif()
|
|
94
|
+
elseif (LLAMA_HTTPLIB)
|
|
95
|
+
# otherwise, use cpp-httplib
|
|
96
|
+
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_HTTPLIB)
|
|
97
|
+
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} cpp-httplib)
|
|
129
98
|
endif()
|
|
130
99
|
|
|
131
100
|
if (LLAMA_LLGUIDANCE)
|
|
@@ -2253,6 +2253,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2253
2253
|
params.is_pp_shared = true;
|
|
2254
2254
|
}
|
|
2255
2255
|
).set_examples({LLAMA_EXAMPLE_BENCH, LLAMA_EXAMPLE_PARALLEL}));
|
|
2256
|
+
add_opt(common_arg(
|
|
2257
|
+
{"-tgs"},
|
|
2258
|
+
string_format("is the text generation separated across the different sequences (default: %s)", params.is_tg_separate ? "true" : "false"),
|
|
2259
|
+
[](common_params & params) {
|
|
2260
|
+
params.is_tg_separate = true;
|
|
2261
|
+
}
|
|
2262
|
+
).set_examples({LLAMA_EXAMPLE_BENCH, LLAMA_EXAMPLE_PARALLEL}));
|
|
2256
2263
|
add_opt(common_arg(
|
|
2257
2264
|
{"-npp"}, "n0,n1,...",
|
|
2258
2265
|
"number of prompt tokens",
|
|
@@ -355,11 +355,7 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD
|
|
|
355
355
|
}
|
|
356
356
|
|
|
357
357
|
void common_init() {
|
|
358
|
-
llama_log_set(
|
|
359
|
-
if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) {
|
|
360
|
-
common_log_add(common_log_main(), level, "%s", text);
|
|
361
|
-
}
|
|
362
|
-
}, NULL);
|
|
358
|
+
llama_log_set(common_log_default_callback, NULL);
|
|
363
359
|
|
|
364
360
|
#ifdef NDEBUG
|
|
365
361
|
const char * build_type = "";
|
|
@@ -461,7 +461,8 @@ struct common_params {
|
|
|
461
461
|
float slot_prompt_similarity = 0.1f;
|
|
462
462
|
|
|
463
463
|
// batched-bench params
|
|
464
|
-
bool is_pp_shared
|
|
464
|
+
bool is_pp_shared = false;
|
|
465
|
+
bool is_tg_separate = false;
|
|
465
466
|
|
|
466
467
|
std::vector<int32_t> n_pp;
|
|
467
468
|
std::vector<int32_t> n_tg;
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
#if defined(LLAMA_USE_CURL)
|
|
21
21
|
#include <curl/curl.h>
|
|
22
22
|
#include <curl/easy.h>
|
|
23
|
-
#
|
|
23
|
+
#elif defined(LLAMA_USE_HTTPLIB)
|
|
24
24
|
#include "http.h"
|
|
25
25
|
#endif
|
|
26
26
|
|
|
@@ -467,7 +467,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
|
|
|
467
467
|
return { res_code, std::move(res_buffer) };
|
|
468
468
|
}
|
|
469
469
|
|
|
470
|
-
#
|
|
470
|
+
#elif defined(LLAMA_USE_HTTPLIB)
|
|
471
471
|
|
|
472
472
|
static bool is_output_a_tty() {
|
|
473
473
|
#if defined(_WIN32)
|
|
@@ -713,6 +713,8 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
|
|
|
713
713
|
|
|
714
714
|
#endif // LLAMA_USE_CURL
|
|
715
715
|
|
|
716
|
+
#if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB)
|
|
717
|
+
|
|
716
718
|
static bool common_download_file_single(const std::string & url,
|
|
717
719
|
const std::string & path,
|
|
718
720
|
const std::string & bearer_token,
|
|
@@ -907,33 +909,6 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
|
|
|
907
909
|
return { hf_repo, ggufFile, mmprojFile };
|
|
908
910
|
}
|
|
909
911
|
|
|
910
|
-
std::vector<common_cached_model_info> common_list_cached_models() {
|
|
911
|
-
std::vector<common_cached_model_info> models;
|
|
912
|
-
const std::string cache_dir = fs_get_cache_directory();
|
|
913
|
-
const std::vector<common_file_info> files = fs_list_files(cache_dir);
|
|
914
|
-
for (const auto & file : files) {
|
|
915
|
-
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
|
|
916
|
-
common_cached_model_info model_info;
|
|
917
|
-
model_info.manifest_path = file.path;
|
|
918
|
-
std::string fname = file.name;
|
|
919
|
-
string_replace_all(fname, ".json", ""); // remove extension
|
|
920
|
-
auto parts = string_split<std::string>(fname, '=');
|
|
921
|
-
if (parts.size() == 4) {
|
|
922
|
-
// expect format: manifest=<user>=<model>=<tag>=<other>
|
|
923
|
-
model_info.user = parts[1];
|
|
924
|
-
model_info.model = parts[2];
|
|
925
|
-
model_info.tag = parts[3];
|
|
926
|
-
} else {
|
|
927
|
-
// invalid format
|
|
928
|
-
continue;
|
|
929
|
-
}
|
|
930
|
-
model_info.size = 0; // TODO: get GGUF size, not manifest size
|
|
931
|
-
models.push_back(model_info);
|
|
932
|
-
}
|
|
933
|
-
}
|
|
934
|
-
return models;
|
|
935
|
-
}
|
|
936
|
-
|
|
937
912
|
//
|
|
938
913
|
// Docker registry functions
|
|
939
914
|
//
|
|
@@ -1052,3 +1027,46 @@ std::string common_docker_resolve_model(const std::string & docker) {
|
|
|
1052
1027
|
throw;
|
|
1053
1028
|
}
|
|
1054
1029
|
}
|
|
1030
|
+
|
|
1031
|
+
#else
|
|
1032
|
+
|
|
1033
|
+
common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
|
|
1034
|
+
throw std::runtime_error("download functionality is not enabled in this build");
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
bool common_download_model(const common_params_model &, const std::string &, bool) {
|
|
1038
|
+
throw std::runtime_error("download functionality is not enabled in this build");
|
|
1039
|
+
}
|
|
1040
|
+
|
|
1041
|
+
std::string common_docker_resolve_model(const std::string &) {
|
|
1042
|
+
throw std::runtime_error("download functionality is not enabled in this build");
|
|
1043
|
+
}
|
|
1044
|
+
|
|
1045
|
+
#endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB
|
|
1046
|
+
|
|
1047
|
+
std::vector<common_cached_model_info> common_list_cached_models() {
|
|
1048
|
+
std::vector<common_cached_model_info> models;
|
|
1049
|
+
const std::string cache_dir = fs_get_cache_directory();
|
|
1050
|
+
const std::vector<common_file_info> files = fs_list_files(cache_dir);
|
|
1051
|
+
for (const auto & file : files) {
|
|
1052
|
+
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
|
|
1053
|
+
common_cached_model_info model_info;
|
|
1054
|
+
model_info.manifest_path = file.path;
|
|
1055
|
+
std::string fname = file.name;
|
|
1056
|
+
string_replace_all(fname, ".json", ""); // remove extension
|
|
1057
|
+
auto parts = string_split<std::string>(fname, '=');
|
|
1058
|
+
if (parts.size() == 4) {
|
|
1059
|
+
// expect format: manifest=<user>=<model>=<tag>=<other>
|
|
1060
|
+
model_info.user = parts[1];
|
|
1061
|
+
model_info.model = parts[2];
|
|
1062
|
+
model_info.tag = parts[3];
|
|
1063
|
+
} else {
|
|
1064
|
+
// invalid format
|
|
1065
|
+
continue;
|
|
1066
|
+
}
|
|
1067
|
+
model_info.size = 0; // TODO: get GGUF size, not manifest size
|
|
1068
|
+
models.push_back(model_info);
|
|
1069
|
+
}
|
|
1070
|
+
}
|
|
1071
|
+
return models;
|
|
1072
|
+
}
|
|
@@ -442,3 +442,9 @@ void common_log_set_prefix(struct common_log * log, bool prefix) {
|
|
|
442
442
|
void common_log_set_timestamps(struct common_log * log, bool timestamps) {
|
|
443
443
|
log->set_timestamps(timestamps);
|
|
444
444
|
}
|
|
445
|
+
|
|
446
|
+
void common_log_default_callback(enum ggml_log_level level, const char * text, void * /*user_data*/) {
|
|
447
|
+
if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) {
|
|
448
|
+
common_log_add(common_log_main(), level, "%s", text);
|
|
449
|
+
}
|
|
450
|
+
}
|
|
@@ -36,6 +36,8 @@ extern int common_log_verbosity_thold;
|
|
|
36
36
|
|
|
37
37
|
void common_log_set_verbosity_thold(int verbosity); // not thread-safe
|
|
38
38
|
|
|
39
|
+
void common_log_default_callback(enum ggml_log_level level, const char * text, void * user_data);
|
|
40
|
+
|
|
39
41
|
// the common_log uses an internal worker thread to print/write log messages
|
|
40
42
|
// when the worker thread is paused, incoming log messages are discarded
|
|
41
43
|
struct common_log;
|
|
@@ -475,6 +475,7 @@ extern "C" {
|
|
|
475
475
|
GGML_OP_COS,
|
|
476
476
|
GGML_OP_SUM,
|
|
477
477
|
GGML_OP_SUM_ROWS,
|
|
478
|
+
GGML_OP_CUMSUM,
|
|
478
479
|
GGML_OP_MEAN,
|
|
479
480
|
GGML_OP_ARGMAX,
|
|
480
481
|
GGML_OP_COUNT_EQUAL,
|
|
@@ -530,6 +531,8 @@ extern "C" {
|
|
|
530
531
|
GGML_OP_TIMESTEP_EMBEDDING,
|
|
531
532
|
GGML_OP_ARGSORT,
|
|
532
533
|
GGML_OP_LEAKY_RELU,
|
|
534
|
+
GGML_OP_TRI,
|
|
535
|
+
GGML_OP_FILL,
|
|
533
536
|
|
|
534
537
|
GGML_OP_FLASH_ATTN_EXT,
|
|
535
538
|
GGML_OP_FLASH_ATTN_BACK,
|
|
@@ -542,6 +545,7 @@ extern "C" {
|
|
|
542
545
|
GGML_OP_RWKV_WKV6,
|
|
543
546
|
GGML_OP_GATED_LINEAR_ATTN,
|
|
544
547
|
GGML_OP_RWKV_WKV7,
|
|
548
|
+
GGML_OP_SOLVE_TRI,
|
|
545
549
|
|
|
546
550
|
GGML_OP_UNARY,
|
|
547
551
|
|
|
@@ -576,6 +580,8 @@ extern "C" {
|
|
|
576
580
|
GGML_UNARY_OP_HARDSWISH,
|
|
577
581
|
GGML_UNARY_OP_HARDSIGMOID,
|
|
578
582
|
GGML_UNARY_OP_EXP,
|
|
583
|
+
GGML_UNARY_OP_EXPM1,
|
|
584
|
+
GGML_UNARY_OP_SOFTPLUS,
|
|
579
585
|
GGML_UNARY_OP_GELU_ERF,
|
|
580
586
|
GGML_UNARY_OP_XIELU,
|
|
581
587
|
GGML_UNARY_OP_FLOOR,
|
|
@@ -620,6 +626,13 @@ extern "C" {
|
|
|
620
626
|
GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
|
|
621
627
|
};
|
|
622
628
|
|
|
629
|
+
enum ggml_tri_type {
|
|
630
|
+
GGML_TRI_TYPE_UPPER_DIAG = 0,
|
|
631
|
+
GGML_TRI_TYPE_UPPER = 1,
|
|
632
|
+
GGML_TRI_TYPE_LOWER_DIAG = 2,
|
|
633
|
+
GGML_TRI_TYPE_LOWER = 3
|
|
634
|
+
};
|
|
635
|
+
|
|
623
636
|
struct ggml_init_params {
|
|
624
637
|
// memory pool
|
|
625
638
|
size_t mem_size; // bytes
|
|
@@ -957,6 +970,22 @@ extern "C" {
|
|
|
957
970
|
struct ggml_context * ctx,
|
|
958
971
|
struct ggml_tensor * a);
|
|
959
972
|
|
|
973
|
+
GGML_API struct ggml_tensor * ggml_expm1(
|
|
974
|
+
struct ggml_context * ctx,
|
|
975
|
+
struct ggml_tensor * a);
|
|
976
|
+
|
|
977
|
+
GGML_API struct ggml_tensor * ggml_expm1_inplace(
|
|
978
|
+
struct ggml_context * ctx,
|
|
979
|
+
struct ggml_tensor * a);
|
|
980
|
+
|
|
981
|
+
GGML_API struct ggml_tensor * ggml_softplus(
|
|
982
|
+
struct ggml_context * ctx,
|
|
983
|
+
struct ggml_tensor * a);
|
|
984
|
+
|
|
985
|
+
GGML_API struct ggml_tensor * ggml_softplus_inplace(
|
|
986
|
+
struct ggml_context * ctx,
|
|
987
|
+
struct ggml_tensor * a);
|
|
988
|
+
|
|
960
989
|
GGML_API struct ggml_tensor * ggml_sin(
|
|
961
990
|
struct ggml_context * ctx,
|
|
962
991
|
struct ggml_tensor * a);
|
|
@@ -983,6 +1012,10 @@ extern "C" {
|
|
|
983
1012
|
struct ggml_context * ctx,
|
|
984
1013
|
struct ggml_tensor * a);
|
|
985
1014
|
|
|
1015
|
+
GGML_API struct ggml_tensor * ggml_cumsum(
|
|
1016
|
+
struct ggml_context * ctx,
|
|
1017
|
+
struct ggml_tensor * a);
|
|
1018
|
+
|
|
986
1019
|
// mean along rows
|
|
987
1020
|
GGML_API struct ggml_tensor * ggml_mean(
|
|
988
1021
|
struct ggml_context * ctx,
|
|
@@ -2187,6 +2220,23 @@ extern "C" {
|
|
|
2187
2220
|
int shift2,
|
|
2188
2221
|
int shift3);
|
|
2189
2222
|
|
|
2223
|
+
// Convert matrix into a triangular one (upper, strict upper, lower or strict lower) by writing
|
|
2224
|
+
// zeroes everywhere outside the masked area
|
|
2225
|
+
GGML_API struct ggml_tensor * ggml_tri(
|
|
2226
|
+
struct ggml_context * ctx,
|
|
2227
|
+
struct ggml_tensor * a,
|
|
2228
|
+
enum ggml_tri_type type);
|
|
2229
|
+
|
|
2230
|
+
// Fill tensor a with constant c
|
|
2231
|
+
GGML_API struct ggml_tensor * ggml_fill(
|
|
2232
|
+
struct ggml_context * ctx,
|
|
2233
|
+
struct ggml_tensor * a,
|
|
2234
|
+
float c);
|
|
2235
|
+
|
|
2236
|
+
GGML_API struct ggml_tensor * ggml_fill_inplace(
|
|
2237
|
+
struct ggml_context * ctx,
|
|
2238
|
+
struct ggml_tensor * a,
|
|
2239
|
+
float c);
|
|
2190
2240
|
|
|
2191
2241
|
// Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
|
|
2192
2242
|
// timesteps: [N,]
|
|
@@ -2356,6 +2406,27 @@ extern "C" {
|
|
|
2356
2406
|
struct ggml_tensor * b,
|
|
2357
2407
|
struct ggml_tensor * state);
|
|
2358
2408
|
|
|
2409
|
+
/* Solves a specific equation of the form Ax=B, where A is a triangular matrix
|
|
2410
|
+
* without zeroes on the diagonal (i.e. invertible).
|
|
2411
|
+
* B can have any number of columns, but must have the same number of rows as A
|
|
2412
|
+
* If A is [n, n] and B is [n, m], then the result will be [n, m] as well
|
|
2413
|
+
* Has O(n^3) complexity (unlike most matrix ops out there), so use on cases
|
|
2414
|
+
* where n > 100 sparingly, pre-chunk if necessary.
|
|
2415
|
+
*
|
|
2416
|
+
* If left = false, solves xA=B instead
|
|
2417
|
+
* If lower = false, assumes upper triangular instead
|
|
2418
|
+
* If uni = true, assumes diagonal of A to be all ones (will override actual values)
|
|
2419
|
+
*
|
|
2420
|
+
* TODO: currently only lower, right, non-unitriangular variant is implemented
|
|
2421
|
+
*/
|
|
2422
|
+
GGML_API struct ggml_tensor * ggml_solve_tri(
|
|
2423
|
+
struct ggml_context * ctx,
|
|
2424
|
+
struct ggml_tensor * a,
|
|
2425
|
+
struct ggml_tensor * b,
|
|
2426
|
+
bool left,
|
|
2427
|
+
bool lower,
|
|
2428
|
+
bool uni);
|
|
2429
|
+
|
|
2359
2430
|
// custom operators
|
|
2360
2431
|
|
|
2361
2432
|
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
|
|
@@ -211,6 +211,11 @@ add_library(ggml-base
|
|
|
211
211
|
ggml-quants.h
|
|
212
212
|
gguf.cpp)
|
|
213
213
|
|
|
214
|
+
set_target_properties(ggml-base PROPERTIES
|
|
215
|
+
VERSION ${GGML_VERSION}
|
|
216
|
+
SOVERSION ${GGML_VERSION_MAJOR}
|
|
217
|
+
)
|
|
218
|
+
|
|
214
219
|
target_include_directories(ggml-base PRIVATE .)
|
|
215
220
|
if (GGML_BACKEND_DL)
|
|
216
221
|
target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
|
|
@@ -220,6 +225,11 @@ add_library(ggml
|
|
|
220
225
|
ggml-backend-reg.cpp)
|
|
221
226
|
add_library(ggml::ggml ALIAS ggml)
|
|
222
227
|
|
|
228
|
+
set_target_properties(ggml PROPERTIES
|
|
229
|
+
VERSION ${GGML_VERSION}
|
|
230
|
+
SOVERSION ${GGML_VERSION_MAJOR}
|
|
231
|
+
)
|
|
232
|
+
|
|
223
233
|
if (GGML_BACKEND_DIR)
|
|
224
234
|
if (NOT GGML_BACKEND_DL)
|
|
225
235
|
message(FATAL_ERROR "GGML_BACKEND_DIR requires GGML_BACKEND_DL")
|
|
@@ -259,6 +269,12 @@ function(ggml_add_backend_library backend)
|
|
|
259
269
|
target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED)
|
|
260
270
|
endif()
|
|
261
271
|
|
|
272
|
+
# Set versioning properties for all backend libraries
|
|
273
|
+
set_target_properties(${backend} PROPERTIES
|
|
274
|
+
VERSION ${GGML_VERSION}
|
|
275
|
+
SOVERSION ${GGML_VERSION_MAJOR}
|
|
276
|
+
)
|
|
277
|
+
|
|
262
278
|
if(NOT GGML_AVAILABLE_BACKENDS)
|
|
263
279
|
set(GGML_AVAILABLE_BACKENDS "${backend}"
|
|
264
280
|
CACHE INTERNAL "List of backends for cmake package")
|
|
@@ -126,25 +126,36 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
126
126
|
)
|
|
127
127
|
if (NOT ARM_MCPU_RESULT)
|
|
128
128
|
string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}")
|
|
129
|
+
string(REGEX MATCH "-march=[^ ']+" ARM_MARCH_FLAG "${ARM_MCPU}")
|
|
130
|
+
|
|
131
|
+
# on some old GCC we need to read -march=
|
|
132
|
+
if (ARM_MARCH_FLAG AND NOT "${ARM_MARCH_FLAG}" STREQUAL "-march=native")
|
|
133
|
+
set(ARM_NATIVE_FLAG "${ARM_MARCH_FLAG}")
|
|
134
|
+
elseif(ARM_MCPU_FLAG AND NOT "${ARM_MCPU_FLAG}" STREQUAL "-mcpu=native")
|
|
135
|
+
set(ARM_NATIVE_FLAG "${ARM_MCPU_FLAG}")
|
|
136
|
+
endif()
|
|
129
137
|
endif()
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
138
|
+
|
|
139
|
+
if ("${ARM_NATIVE_FLAG}" STREQUAL "")
|
|
140
|
+
set(ARM_NATIVE_FLAG -mcpu=native)
|
|
141
|
+
message(WARNING "ARM -march/-mcpu not found, -mcpu=native will be used")
|
|
142
|
+
else()
|
|
143
|
+
message(STATUS "ARM detected flags: ${ARM_NATIVE_FLAG}")
|
|
133
144
|
endif()
|
|
134
145
|
|
|
135
146
|
include(CheckCXXSourceRuns)
|
|
136
147
|
|
|
137
148
|
function(check_arm_feature tag code)
|
|
138
149
|
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
|
139
|
-
set(CMAKE_REQUIRED_FLAGS "${
|
|
150
|
+
set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+${tag}")
|
|
140
151
|
check_cxx_source_runs("${code}" GGML_MACHINE_SUPPORTS_${tag})
|
|
141
152
|
if (GGML_MACHINE_SUPPORTS_${tag})
|
|
142
|
-
set(
|
|
153
|
+
set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+${tag}" PARENT_SCOPE)
|
|
143
154
|
else()
|
|
144
|
-
set(CMAKE_REQUIRED_FLAGS "${
|
|
155
|
+
set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+no${tag}")
|
|
145
156
|
check_cxx_source_compiles("int main() { return 0; }" GGML_MACHINE_SUPPORTS_no${tag})
|
|
146
157
|
if (GGML_MACHINE_SUPPORTS_no${tag})
|
|
147
|
-
set(
|
|
158
|
+
set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+no${tag}" PARENT_SCOPE)
|
|
148
159
|
endif()
|
|
149
160
|
endif()
|
|
150
161
|
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
|
|
@@ -155,7 +166,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
155
166
|
check_arm_feature(sve "#include <arm_sve.h>\nint main() { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }")
|
|
156
167
|
check_arm_feature(sme "#include <arm_sme.h>\n__arm_locally_streaming int main() { __asm__ volatile(\"smstart; smstop;\"); return 0; }")
|
|
157
168
|
|
|
158
|
-
list(APPEND ARCH_FLAGS "${
|
|
169
|
+
list(APPEND ARCH_FLAGS "${ARM_NATIVE_FLAG}${ARM_NATIVE_FLAG_FIX}")
|
|
159
170
|
else()
|
|
160
171
|
if (GGML_CPU_ARM_ARCH)
|
|
161
172
|
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
|
|
@@ -579,6 +590,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
579
590
|
${KLEIDIAI_SRC}/kai/ukernels/
|
|
580
591
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/
|
|
581
592
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/
|
|
593
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/
|
|
582
594
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/
|
|
583
595
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/)
|
|
584
596
|
|
|
@@ -597,23 +609,34 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
597
609
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p4x8sb_f32_neon.c
|
|
598
610
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c
|
|
599
611
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c
|
|
600
|
-
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c
|
|
612
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c
|
|
613
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qai8dxp_f32.c
|
|
614
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi8cxp_qsi8cx_neon.c)
|
|
601
615
|
|
|
602
616
|
if (NOT DOTPROD_ENABLED MATCHES -1)
|
|
603
617
|
list(APPEND GGML_KLEIDIAI_SOURCES
|
|
604
618
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c
|
|
605
619
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod.c
|
|
606
|
-
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.c
|
|
620
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.c
|
|
621
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.c
|
|
622
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c
|
|
623
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c)
|
|
607
624
|
endif()
|
|
608
625
|
|
|
609
626
|
if (NOT I8MM_ENABLED MATCHES -1)
|
|
610
|
-
list(APPEND GGML_KLEIDIAI_SOURCES
|
|
627
|
+
list(APPEND GGML_KLEIDIAI_SOURCES
|
|
628
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm.c
|
|
629
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm.c)
|
|
611
630
|
endif()
|
|
612
631
|
|
|
613
632
|
if (NOT SME_ENABLED MATCHES -1)
|
|
614
633
|
list(APPEND GGML_KLEIDIAI_SOURCES
|
|
615
634
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c
|
|
616
635
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c
|
|
636
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa.c
|
|
637
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa_asm.S
|
|
638
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot.c
|
|
639
|
+
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot_asm.S
|
|
617
640
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.c
|
|
618
641
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa_asm.S
|
|
619
642
|
${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c
|