@fugood/llama.node 1.3.1 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CMakeLists.txt +4 -3
  2. package/package.json +14 -14
  3. package/scripts/llama.cpp.patch +6 -6
  4. package/src/llama.cpp/CMakeLists.txt +4 -0
  5. package/src/llama.cpp/common/CMakeLists.txt +6 -37
  6. package/src/llama.cpp/common/arg.cpp +7 -0
  7. package/src/llama.cpp/common/common.cpp +1 -5
  8. package/src/llama.cpp/common/common.h +2 -1
  9. package/src/llama.cpp/common/download.cpp +47 -29
  10. package/src/llama.cpp/common/log.cpp +6 -0
  11. package/src/llama.cpp/common/log.h +2 -0
  12. package/src/llama.cpp/ggml/include/ggml.h +71 -0
  13. package/src/llama.cpp/ggml/src/CMakeLists.txt +16 -0
  14. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +34 -11
  15. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +428 -26
  16. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +50 -16
  17. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +283 -0
  18. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +1 -0
  19. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +235 -34
  20. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +289 -317
  21. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +4 -4
  22. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +95 -42
  23. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +16 -0
  24. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +2 -0
  25. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +17 -0
  26. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +10 -0
  27. package/src/llama.cpp/src/CMakeLists.txt +6 -0
  28. package/src/llama.cpp/src/llama-arch.cpp +32 -0
  29. package/src/llama.cpp/src/llama-arch.h +2 -0
  30. package/src/llama.cpp/src/llama-graph.cpp +2 -1
  31. package/src/llama.cpp/src/llama-memory-recurrent.cpp +4 -3
  32. package/src/llama.cpp/src/llama-model.cpp +102 -0
  33. package/src/llama.cpp/src/llama-model.h +2 -0
  34. package/src/llama.cpp/src/llama-sampling.cpp +10 -5
  35. package/src/llama.cpp/src/llama-vocab.cpp +16 -1
  36. package/src/llama.cpp/src/llama-vocab.h +1 -0
  37. package/src/llama.cpp/src/models/afmoe.cpp +187 -0
  38. package/src/llama.cpp/src/models/ernie4-5.cpp +4 -5
  39. package/src/llama.cpp/src/models/models.h +4 -0
  40. package/src/llama.cpp/src/models/openai-moe-iswa.cpp +2 -1
  41. package/src/llama.cpp/src/unicode.cpp +77 -0
package/CMakeLists.txt CHANGED
@@ -121,6 +121,8 @@ endif()
121
121
 
122
122
  set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build common")
123
123
 
124
+ set(LLAMA_BUILD_TOOLS ON CACHE BOOL "Build tools")
125
+
124
126
  set(LLAMA_CURL OFF CACHE BOOL "Build curl")
125
127
 
126
128
  set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
@@ -128,7 +130,6 @@ set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
128
130
  add_definitions(-DGGML_MAX_NAME=80)
129
131
 
130
132
  add_subdirectory("src/llama.cpp")
131
- add_subdirectory("src/llama.cpp/tools/mtmd")
132
133
 
133
134
  include_directories(
134
135
  ${CMAKE_JS_INC}
@@ -172,7 +173,7 @@ if (NOT MSVC AND CMAKE_SYSTEM_NAME STREQUAL "Windows")
172
173
 
173
174
  add_library(win_dynamic_load ${WIN_DYNAMIC_LOAD_SRC})
174
175
  set_target_properties(win_dynamic_load PROPERTIES COMPILE_FLAGS "-Wno-implicit-function-declaration")
175
-
176
+
176
177
  unset(CMAKE_JS_SRC)
177
178
  unset(CMAKE_JS_LIB)
178
179
  unset(CMAKE_JS_NODELIB_DEF)
@@ -207,7 +208,7 @@ if(CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET)
207
208
  endif()
208
209
 
209
210
  if (GGML_METAL AND NOT GGML_METAL_EMBED_LIBRARY)
210
- # copy ${CMAKE_BINARY_DIR}/bin/default.metallib
211
+ # copy ${CMAKE_BINARY_DIR}/bin/default.metallib
211
212
  add_custom_command(
212
213
  TARGET copy_assets
213
214
  COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/bin/default.metallib ${METAL_LIB_TARGET_PATH}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.3.1",
4
+ "version": "1.3.3",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,19 +72,19 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-linux-x64": "1.3.1",
76
- "@fugood/node-llama-linux-x64-vulkan": "1.3.1",
77
- "@fugood/node-llama-linux-x64-cuda": "1.3.1",
78
- "@fugood/node-llama-linux-arm64": "1.3.1",
79
- "@fugood/node-llama-linux-arm64-vulkan": "1.3.1",
80
- "@fugood/node-llama-linux-arm64-cuda": "1.3.1",
81
- "@fugood/node-llama-win32-x64": "1.3.1",
82
- "@fugood/node-llama-win32-x64-vulkan": "1.3.1",
83
- "@fugood/node-llama-win32-x64-cuda": "1.3.1",
84
- "@fugood/node-llama-win32-arm64": "1.3.1",
85
- "@fugood/node-llama-win32-arm64-vulkan": "1.3.1",
86
- "@fugood/node-llama-darwin-x64": "1.3.1",
87
- "@fugood/node-llama-darwin-arm64": "1.3.1"
75
+ "@fugood/node-llama-linux-x64": "1.3.3",
76
+ "@fugood/node-llama-linux-x64-vulkan": "1.3.3",
77
+ "@fugood/node-llama-linux-x64-cuda": "1.3.3",
78
+ "@fugood/node-llama-linux-arm64": "1.3.3",
79
+ "@fugood/node-llama-linux-arm64-vulkan": "1.3.3",
80
+ "@fugood/node-llama-linux-arm64-cuda": "1.3.3",
81
+ "@fugood/node-llama-win32-x64": "1.3.3",
82
+ "@fugood/node-llama-win32-x64-vulkan": "1.3.3",
83
+ "@fugood/node-llama-win32-x64-cuda": "1.3.3",
84
+ "@fugood/node-llama-win32-arm64": "1.3.3",
85
+ "@fugood/node-llama-win32-arm64-vulkan": "1.3.3",
86
+ "@fugood/node-llama-darwin-x64": "1.3.3",
87
+ "@fugood/node-llama-darwin-arm64": "1.3.3"
88
88
  },
89
89
  "devDependencies": {
90
90
  "@babel/preset-env": "^7.24.4",
@@ -1,8 +1,8 @@
1
1
  diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
2
- index fe290bf8f..d377e29b9 100644
2
+ index 706fa32ee..248459903 100644
3
3
  --- a/src/llama.cpp/common/CMakeLists.txt
4
4
  +++ b/src/llama.cpp/common/CMakeLists.txt
5
- @@ -170,9 +170,16 @@ if (LLAMA_LLGUIDANCE)
5
+ @@ -141,9 +141,16 @@ if (LLAMA_LLGUIDANCE)
6
6
  set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
7
7
  endif ()
8
8
 
@@ -85,10 +85,10 @@ index 50efb0d4e..f471a84c7 100644
85
85
  struct common_chat_tool_call {
86
86
  std::string name;
87
87
  diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
88
- index b0591e84b..93759f884 100644
88
+ index 4dc95dcba..ea0ea86c0 100644
89
89
  --- a/src/llama.cpp/common/common.cpp
90
90
  +++ b/src/llama.cpp/common/common.cpp
91
- @@ -1126,6 +1126,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
91
+ @@ -1155,6 +1155,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
92
92
  mparams.n_gpu_layers = params.n_gpu_layers;
93
93
  }
94
94
 
@@ -97,7 +97,7 @@ index b0591e84b..93759f884 100644
97
97
  mparams.split_mode = params.split_mode;
98
98
  mparams.tensor_split = params.tensor_split;
99
99
  diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
100
- index a8cb630ea..0919ec5d3 100644
100
+ index f42c083fa..c573cc812 100644
101
101
  --- a/src/llama.cpp/common/common.h
102
102
  +++ b/src/llama.cpp/common/common.h
103
103
  @@ -274,6 +274,7 @@ struct lr_opt {
@@ -109,7 +109,7 @@ index a8cb630ea..0919ec5d3 100644
109
109
  int32_t n_ctx = 4096; // context size
110
110
  int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
111
111
  diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
112
- index 23ec8bb08..33c93cba7 100644
112
+ index e52e050a8..c1000c162 100644
113
113
  --- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
114
114
  +++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
115
115
  @@ -106,7 +106,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
@@ -92,6 +92,7 @@ option(LLAMA_TOOLS_INSTALL "llama: install tools" ${LLAMA_TOOLS_INSTALL_
92
92
 
93
93
  # 3rd party libs
94
94
  option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
95
+ option(LLAMA_HTTPLIB "llama: if libcurl is disabled, use httplib to download model from an URL" ON)
95
96
  option(LLAMA_OPENSSL "llama: use openssl to support HTTPS" OFF)
96
97
  option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
97
98
 
@@ -200,6 +201,9 @@ endif()
200
201
 
201
202
  if (LLAMA_BUILD_COMMON)
202
203
  add_subdirectory(common)
204
+ if (LLAMA_HTTPLIB)
205
+ add_subdirectory(vendor/cpp-httplib)
206
+ endif()
203
207
  endif()
204
208
 
205
209
  if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
@@ -79,10 +79,11 @@ if (BUILD_SHARED_LIBS)
79
79
  set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
80
80
  endif()
81
81
 
82
+ # TODO: use list(APPEND LLAMA_COMMON_EXTRA_LIBS ...)
82
83
  set(LLAMA_COMMON_EXTRA_LIBS build_info)
83
84
 
84
- # Use curl to download model url
85
85
  if (LLAMA_CURL)
86
+ # Use curl to download model url
86
87
  find_package(CURL)
87
88
  if (NOT CURL_FOUND)
88
89
  message(FATAL_ERROR "Could NOT find CURL. Hint: to disable this feature, set -DLLAMA_CURL=OFF")
@@ -90,42 +91,10 @@ if (LLAMA_CURL)
90
91
  target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
91
92
  include_directories(${CURL_INCLUDE_DIRS})
92
93
  set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES})
93
- endif()
94
-
95
- if (LLAMA_OPENSSL)
96
- find_package(OpenSSL)
97
- if (OpenSSL_FOUND)
98
- include(CheckCSourceCompiles)
99
- set(SAVED_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
100
- set(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
101
- check_c_source_compiles("
102
- #include <openssl/opensslv.h>
103
- #if defined(OPENSSL_IS_BORINGSSL) || defined(LIBRESSL_VERSION_NUMBER)
104
- # if OPENSSL_VERSION_NUMBER < 0x1010107f
105
- # error bad version
106
- # endif
107
- #else
108
- # if OPENSSL_VERSION_NUMBER < 0x30000000L
109
- # error bad version
110
- # endif
111
- #endif
112
- int main() { return 0; }
113
- " OPENSSL_VERSION_SUPPORTED)
114
- set(CMAKE_REQUIRED_INCLUDES ${SAVED_CMAKE_REQUIRED_INCLUDES})
115
- if (OPENSSL_VERSION_SUPPORTED)
116
- message(STATUS "OpenSSL found: ${OPENSSL_VERSION}")
117
- target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_OPENSSL_SUPPORT)
118
- target_link_libraries(${TARGET} PUBLIC OpenSSL::SSL OpenSSL::Crypto)
119
- if (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin")
120
- target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_USE_CERTS_FROM_MACOSX_KEYCHAIN)
121
- find_library(CORE_FOUNDATION_FRAMEWORK CoreFoundation REQUIRED)
122
- find_library(SECURITY_FRAMEWORK Security REQUIRED)
123
- target_link_libraries(${TARGET} PUBLIC ${CORE_FOUNDATION_FRAMEWORK} ${SECURITY_FRAMEWORK})
124
- endif()
125
- endif()
126
- else()
127
- message(STATUS "OpenSSL not found, SSL support disabled")
128
- endif()
94
+ elseif (LLAMA_HTTPLIB)
95
+ # otherwise, use cpp-httplib
96
+ target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_HTTPLIB)
97
+ set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} cpp-httplib)
129
98
  endif()
130
99
 
131
100
  if (LLAMA_LLGUIDANCE)
@@ -2253,6 +2253,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
2253
2253
  params.is_pp_shared = true;
2254
2254
  }
2255
2255
  ).set_examples({LLAMA_EXAMPLE_BENCH, LLAMA_EXAMPLE_PARALLEL}));
2256
+ add_opt(common_arg(
2257
+ {"-tgs"},
2258
+ string_format("is the text generation separated across the different sequences (default: %s)", params.is_tg_separate ? "true" : "false"),
2259
+ [](common_params & params) {
2260
+ params.is_tg_separate = true;
2261
+ }
2262
+ ).set_examples({LLAMA_EXAMPLE_BENCH, LLAMA_EXAMPLE_PARALLEL}));
2256
2263
  add_opt(common_arg(
2257
2264
  {"-npp"}, "n0,n1,...",
2258
2265
  "number of prompt tokens",
@@ -355,11 +355,7 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD
355
355
  }
356
356
 
357
357
  void common_init() {
358
- llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) {
359
- if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) {
360
- common_log_add(common_log_main(), level, "%s", text);
361
- }
362
- }, NULL);
358
+ llama_log_set(common_log_default_callback, NULL);
363
359
 
364
360
  #ifdef NDEBUG
365
361
  const char * build_type = "";
@@ -461,7 +461,8 @@ struct common_params {
461
461
  float slot_prompt_similarity = 0.1f;
462
462
 
463
463
  // batched-bench params
464
- bool is_pp_shared = false;
464
+ bool is_pp_shared = false;
465
+ bool is_tg_separate = false;
465
466
 
466
467
  std::vector<int32_t> n_pp;
467
468
  std::vector<int32_t> n_tg;
@@ -20,7 +20,7 @@
20
20
  #if defined(LLAMA_USE_CURL)
21
21
  #include <curl/curl.h>
22
22
  #include <curl/easy.h>
23
- #else
23
+ #elif defined(LLAMA_USE_HTTPLIB)
24
24
  #include "http.h"
25
25
  #endif
26
26
 
@@ -467,7 +467,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
467
467
  return { res_code, std::move(res_buffer) };
468
468
  }
469
469
 
470
- #else
470
+ #elif defined(LLAMA_USE_HTTPLIB)
471
471
 
472
472
  static bool is_output_a_tty() {
473
473
  #if defined(_WIN32)
@@ -713,6 +713,8 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
713
713
 
714
714
  #endif // LLAMA_USE_CURL
715
715
 
716
+ #if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB)
717
+
716
718
  static bool common_download_file_single(const std::string & url,
717
719
  const std::string & path,
718
720
  const std::string & bearer_token,
@@ -907,33 +909,6 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
907
909
  return { hf_repo, ggufFile, mmprojFile };
908
910
  }
909
911
 
910
- std::vector<common_cached_model_info> common_list_cached_models() {
911
- std::vector<common_cached_model_info> models;
912
- const std::string cache_dir = fs_get_cache_directory();
913
- const std::vector<common_file_info> files = fs_list_files(cache_dir);
914
- for (const auto & file : files) {
915
- if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
916
- common_cached_model_info model_info;
917
- model_info.manifest_path = file.path;
918
- std::string fname = file.name;
919
- string_replace_all(fname, ".json", ""); // remove extension
920
- auto parts = string_split<std::string>(fname, '=');
921
- if (parts.size() == 4) {
922
- // expect format: manifest=<user>=<model>=<tag>=<other>
923
- model_info.user = parts[1];
924
- model_info.model = parts[2];
925
- model_info.tag = parts[3];
926
- } else {
927
- // invalid format
928
- continue;
929
- }
930
- model_info.size = 0; // TODO: get GGUF size, not manifest size
931
- models.push_back(model_info);
932
- }
933
- }
934
- return models;
935
- }
936
-
937
912
  //
938
913
  // Docker registry functions
939
914
  //
@@ -1052,3 +1027,46 @@ std::string common_docker_resolve_model(const std::string & docker) {
1052
1027
  throw;
1053
1028
  }
1054
1029
  }
1030
+
1031
+ #else
1032
+
1033
+ common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
1034
+ throw std::runtime_error("download functionality is not enabled in this build");
1035
+ }
1036
+
1037
+ bool common_download_model(const common_params_model &, const std::string &, bool) {
1038
+ throw std::runtime_error("download functionality is not enabled in this build");
1039
+ }
1040
+
1041
+ std::string common_docker_resolve_model(const std::string &) {
1042
+ throw std::runtime_error("download functionality is not enabled in this build");
1043
+ }
1044
+
1045
+ #endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB
1046
+
1047
+ std::vector<common_cached_model_info> common_list_cached_models() {
1048
+ std::vector<common_cached_model_info> models;
1049
+ const std::string cache_dir = fs_get_cache_directory();
1050
+ const std::vector<common_file_info> files = fs_list_files(cache_dir);
1051
+ for (const auto & file : files) {
1052
+ if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
1053
+ common_cached_model_info model_info;
1054
+ model_info.manifest_path = file.path;
1055
+ std::string fname = file.name;
1056
+ string_replace_all(fname, ".json", ""); // remove extension
1057
+ auto parts = string_split<std::string>(fname, '=');
1058
+ if (parts.size() == 4) {
1059
+ // expect format: manifest=<user>=<model>=<tag>=<other>
1060
+ model_info.user = parts[1];
1061
+ model_info.model = parts[2];
1062
+ model_info.tag = parts[3];
1063
+ } else {
1064
+ // invalid format
1065
+ continue;
1066
+ }
1067
+ model_info.size = 0; // TODO: get GGUF size, not manifest size
1068
+ models.push_back(model_info);
1069
+ }
1070
+ }
1071
+ return models;
1072
+ }
@@ -442,3 +442,9 @@ void common_log_set_prefix(struct common_log * log, bool prefix) {
442
442
  void common_log_set_timestamps(struct common_log * log, bool timestamps) {
443
443
  log->set_timestamps(timestamps);
444
444
  }
445
+
446
+ void common_log_default_callback(enum ggml_log_level level, const char * text, void * /*user_data*/) {
447
+ if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) {
448
+ common_log_add(common_log_main(), level, "%s", text);
449
+ }
450
+ }
@@ -36,6 +36,8 @@ extern int common_log_verbosity_thold;
36
36
 
37
37
  void common_log_set_verbosity_thold(int verbosity); // not thread-safe
38
38
 
39
+ void common_log_default_callback(enum ggml_log_level level, const char * text, void * user_data);
40
+
39
41
  // the common_log uses an internal worker thread to print/write log messages
40
42
  // when the worker thread is paused, incoming log messages are discarded
41
43
  struct common_log;
@@ -475,6 +475,7 @@ extern "C" {
475
475
  GGML_OP_COS,
476
476
  GGML_OP_SUM,
477
477
  GGML_OP_SUM_ROWS,
478
+ GGML_OP_CUMSUM,
478
479
  GGML_OP_MEAN,
479
480
  GGML_OP_ARGMAX,
480
481
  GGML_OP_COUNT_EQUAL,
@@ -530,6 +531,8 @@ extern "C" {
530
531
  GGML_OP_TIMESTEP_EMBEDDING,
531
532
  GGML_OP_ARGSORT,
532
533
  GGML_OP_LEAKY_RELU,
534
+ GGML_OP_TRI,
535
+ GGML_OP_FILL,
533
536
 
534
537
  GGML_OP_FLASH_ATTN_EXT,
535
538
  GGML_OP_FLASH_ATTN_BACK,
@@ -542,6 +545,7 @@ extern "C" {
542
545
  GGML_OP_RWKV_WKV6,
543
546
  GGML_OP_GATED_LINEAR_ATTN,
544
547
  GGML_OP_RWKV_WKV7,
548
+ GGML_OP_SOLVE_TRI,
545
549
 
546
550
  GGML_OP_UNARY,
547
551
 
@@ -576,6 +580,8 @@ extern "C" {
576
580
  GGML_UNARY_OP_HARDSWISH,
577
581
  GGML_UNARY_OP_HARDSIGMOID,
578
582
  GGML_UNARY_OP_EXP,
583
+ GGML_UNARY_OP_EXPM1,
584
+ GGML_UNARY_OP_SOFTPLUS,
579
585
  GGML_UNARY_OP_GELU_ERF,
580
586
  GGML_UNARY_OP_XIELU,
581
587
  GGML_UNARY_OP_FLOOR,
@@ -620,6 +626,13 @@ extern "C" {
620
626
  GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
621
627
  };
622
628
 
629
+ enum ggml_tri_type {
630
+ GGML_TRI_TYPE_UPPER_DIAG = 0,
631
+ GGML_TRI_TYPE_UPPER = 1,
632
+ GGML_TRI_TYPE_LOWER_DIAG = 2,
633
+ GGML_TRI_TYPE_LOWER = 3
634
+ };
635
+
623
636
  struct ggml_init_params {
624
637
  // memory pool
625
638
  size_t mem_size; // bytes
@@ -957,6 +970,22 @@ extern "C" {
957
970
  struct ggml_context * ctx,
958
971
  struct ggml_tensor * a);
959
972
 
973
+ GGML_API struct ggml_tensor * ggml_expm1(
974
+ struct ggml_context * ctx,
975
+ struct ggml_tensor * a);
976
+
977
+ GGML_API struct ggml_tensor * ggml_expm1_inplace(
978
+ struct ggml_context * ctx,
979
+ struct ggml_tensor * a);
980
+
981
+ GGML_API struct ggml_tensor * ggml_softplus(
982
+ struct ggml_context * ctx,
983
+ struct ggml_tensor * a);
984
+
985
+ GGML_API struct ggml_tensor * ggml_softplus_inplace(
986
+ struct ggml_context * ctx,
987
+ struct ggml_tensor * a);
988
+
960
989
  GGML_API struct ggml_tensor * ggml_sin(
961
990
  struct ggml_context * ctx,
962
991
  struct ggml_tensor * a);
@@ -983,6 +1012,10 @@ extern "C" {
983
1012
  struct ggml_context * ctx,
984
1013
  struct ggml_tensor * a);
985
1014
 
1015
+ GGML_API struct ggml_tensor * ggml_cumsum(
1016
+ struct ggml_context * ctx,
1017
+ struct ggml_tensor * a);
1018
+
986
1019
  // mean along rows
987
1020
  GGML_API struct ggml_tensor * ggml_mean(
988
1021
  struct ggml_context * ctx,
@@ -2187,6 +2220,23 @@ extern "C" {
2187
2220
  int shift2,
2188
2221
  int shift3);
2189
2222
 
2223
+ // Convert matrix into a triangular one (upper, strict upper, lower or strict lower) by writing
2224
+ // zeroes everywhere outside the masked area
2225
+ GGML_API struct ggml_tensor * ggml_tri(
2226
+ struct ggml_context * ctx,
2227
+ struct ggml_tensor * a,
2228
+ enum ggml_tri_type type);
2229
+
2230
+ // Fill tensor a with constant c
2231
+ GGML_API struct ggml_tensor * ggml_fill(
2232
+ struct ggml_context * ctx,
2233
+ struct ggml_tensor * a,
2234
+ float c);
2235
+
2236
+ GGML_API struct ggml_tensor * ggml_fill_inplace(
2237
+ struct ggml_context * ctx,
2238
+ struct ggml_tensor * a,
2239
+ float c);
2190
2240
 
2191
2241
  // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
2192
2242
  // timesteps: [N,]
@@ -2356,6 +2406,27 @@ extern "C" {
2356
2406
  struct ggml_tensor * b,
2357
2407
  struct ggml_tensor * state);
2358
2408
 
2409
+ /* Solves a specific equation of the form Ax=B, where A is a triangular matrix
2410
+ * without zeroes on the diagonal (i.e. invertible).
2411
+ * B can have any number of columns, but must have the same number of rows as A
2412
+ * If A is [n, n] and B is [n, m], then the result will be [n, m] as well
2413
+ * Has O(n^3) complexity (unlike most matrix ops out there), so use on cases
2414
+ * where n > 100 sparingly, pre-chunk if necessary.
2415
+ *
2416
+ * If left = false, solves xA=B instead
2417
+ * If lower = false, assumes upper triangular instead
2418
+ * If uni = true, assumes diagonal of A to be all ones (will override actual values)
2419
+ *
2420
+ * TODO: currently only lower, right, non-unitriangular variant is implemented
2421
+ */
2422
+ GGML_API struct ggml_tensor * ggml_solve_tri(
2423
+ struct ggml_context * ctx,
2424
+ struct ggml_tensor * a,
2425
+ struct ggml_tensor * b,
2426
+ bool left,
2427
+ bool lower,
2428
+ bool uni);
2429
+
2359
2430
  // custom operators
2360
2431
 
2361
2432
  typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
@@ -211,6 +211,11 @@ add_library(ggml-base
211
211
  ggml-quants.h
212
212
  gguf.cpp)
213
213
 
214
+ set_target_properties(ggml-base PROPERTIES
215
+ VERSION ${GGML_VERSION}
216
+ SOVERSION ${GGML_VERSION_MAJOR}
217
+ )
218
+
214
219
  target_include_directories(ggml-base PRIVATE .)
215
220
  if (GGML_BACKEND_DL)
216
221
  target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
@@ -220,6 +225,11 @@ add_library(ggml
220
225
  ggml-backend-reg.cpp)
221
226
  add_library(ggml::ggml ALIAS ggml)
222
227
 
228
+ set_target_properties(ggml PROPERTIES
229
+ VERSION ${GGML_VERSION}
230
+ SOVERSION ${GGML_VERSION_MAJOR}
231
+ )
232
+
223
233
  if (GGML_BACKEND_DIR)
224
234
  if (NOT GGML_BACKEND_DL)
225
235
  message(FATAL_ERROR "GGML_BACKEND_DIR requires GGML_BACKEND_DL")
@@ -259,6 +269,12 @@ function(ggml_add_backend_library backend)
259
269
  target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED)
260
270
  endif()
261
271
 
272
+ # Set versioning properties for all backend libraries
273
+ set_target_properties(${backend} PROPERTIES
274
+ VERSION ${GGML_VERSION}
275
+ SOVERSION ${GGML_VERSION_MAJOR}
276
+ )
277
+
262
278
  if(NOT GGML_AVAILABLE_BACKENDS)
263
279
  set(GGML_AVAILABLE_BACKENDS "${backend}"
264
280
  CACHE INTERNAL "List of backends for cmake package")
@@ -126,25 +126,36 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
126
126
  )
127
127
  if (NOT ARM_MCPU_RESULT)
128
128
  string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}")
129
+ string(REGEX MATCH "-march=[^ ']+" ARM_MARCH_FLAG "${ARM_MCPU}")
130
+
131
+ # on some old GCC we need to read -march=
132
+ if (ARM_MARCH_FLAG AND NOT "${ARM_MARCH_FLAG}" STREQUAL "-march=native")
133
+ set(ARM_NATIVE_FLAG "${ARM_MARCH_FLAG}")
134
+ elseif(ARM_MCPU_FLAG AND NOT "${ARM_MCPU_FLAG}" STREQUAL "-mcpu=native")
135
+ set(ARM_NATIVE_FLAG "${ARM_MCPU_FLAG}")
136
+ endif()
129
137
  endif()
130
- if ("${ARM_MCPU_FLAG}" STREQUAL "")
131
- set(ARM_MCPU_FLAG -mcpu=native)
132
- message(STATUS "ARM -mcpu not found, -mcpu=native will be used")
138
+
139
+ if ("${ARM_NATIVE_FLAG}" STREQUAL "")
140
+ set(ARM_NATIVE_FLAG -mcpu=native)
141
+ message(WARNING "ARM -march/-mcpu not found, -mcpu=native will be used")
142
+ else()
143
+ message(STATUS "ARM detected flags: ${ARM_NATIVE_FLAG}")
133
144
  endif()
134
145
 
135
146
  include(CheckCXXSourceRuns)
136
147
 
137
148
  function(check_arm_feature tag code)
138
149
  set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
139
- set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+${tag}")
150
+ set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+${tag}")
140
151
  check_cxx_source_runs("${code}" GGML_MACHINE_SUPPORTS_${tag})
141
152
  if (GGML_MACHINE_SUPPORTS_${tag})
142
- set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+${tag}" PARENT_SCOPE)
153
+ set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+${tag}" PARENT_SCOPE)
143
154
  else()
144
- set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+no${tag}")
155
+ set(CMAKE_REQUIRED_FLAGS "${ARM_NATIVE_FLAG}+no${tag}")
145
156
  check_cxx_source_compiles("int main() { return 0; }" GGML_MACHINE_SUPPORTS_no${tag})
146
157
  if (GGML_MACHINE_SUPPORTS_no${tag})
147
- set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+no${tag}" PARENT_SCOPE)
158
+ set(ARM_NATIVE_FLAG_FIX "${ARM_NATIVE_FLAG_FIX}+no${tag}" PARENT_SCOPE)
148
159
  endif()
149
160
  endif()
150
161
  set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
@@ -155,7 +166,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
155
166
  check_arm_feature(sve "#include <arm_sve.h>\nint main() { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }")
156
167
  check_arm_feature(sme "#include <arm_sme.h>\n__arm_locally_streaming int main() { __asm__ volatile(\"smstart; smstop;\"); return 0; }")
157
168
 
158
- list(APPEND ARCH_FLAGS "${ARM_MCPU_FLAG}${ARM_MCPU_FLAG_FIX}")
169
+ list(APPEND ARCH_FLAGS "${ARM_NATIVE_FLAG}${ARM_NATIVE_FLAG_FIX}")
159
170
  else()
160
171
  if (GGML_CPU_ARM_ARCH)
161
172
  list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
@@ -579,6 +590,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
579
590
  ${KLEIDIAI_SRC}/kai/ukernels/
580
591
  ${KLEIDIAI_SRC}/kai/ukernels/matmul/
581
592
  ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/
593
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/
582
594
  ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/
583
595
  ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/)
584
596
 
@@ -597,23 +609,34 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
597
609
  ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p4x8sb_f32_neon.c
598
610
  ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c
599
611
  ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c
600
- ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c)
612
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c
613
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qai8dxp_f32.c
614
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi8cxp_qsi8cx_neon.c)
601
615
 
602
616
  if (NOT DOTPROD_ENABLED MATCHES -1)
603
617
  list(APPEND GGML_KLEIDIAI_SOURCES
604
618
  ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c
605
619
  ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod.c
606
- ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.c)
620
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.c
621
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x4_qsi8cxp4x4_16x4_neon_dotprod.c
622
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4x4_1x4_neon_dotprod.c
623
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x8_qsi8cxp4x8_1x4_neon_dotprod.c)
607
624
  endif()
608
625
 
609
626
  if (NOT I8MM_ENABLED MATCHES -1)
610
- list(APPEND GGML_KLEIDIAI_SOURCES ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm.c)
627
+ list(APPEND GGML_KLEIDIAI_SOURCES
628
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm.c
629
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp4x8_qsi8cxp4x8_16x4_neon_i8mm.c)
611
630
  endif()
612
631
 
613
632
  if (NOT SME_ENABLED MATCHES -1)
614
633
  list(APPEND GGML_KLEIDIAI_SOURCES
615
634
  ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c
616
635
  ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c
636
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa.c
637
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1vlx4_qsi8cxp4vlx4_1vlx4vl_sme2_mopa_asm.S
638
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot.c
639
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qai8dxp_qsi8cxp/kai_matmul_clamp_f32_qai8dxp1x4_qsi8cxp4vlx4_1x4vl_sme2_dot_asm.S
617
640
  ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa.c
618
641
  ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_fp32_bf16p_bf16p/kai_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa_asm.S
619
642
  ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_pack_bf16p2vlx2_f32_sme.c