@fugood/llama.node 1.3.7 → 1.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.3.7",
4
+ "version": "1.3.8",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,20 +72,20 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-linux-x64": "1.3.7",
76
- "@fugood/node-llama-linux-x64-vulkan": "1.3.7",
77
- "@fugood/node-llama-linux-x64-cuda": "1.3.7",
78
- "@fugood/node-llama-linux-arm64-snapdragon": "1.3.7",
79
- "@fugood/node-llama-linux-arm64": "1.3.7",
80
- "@fugood/node-llama-linux-arm64-vulkan": "1.3.7",
81
- "@fugood/node-llama-linux-arm64-cuda": "1.3.7",
82
- "@fugood/node-llama-win32-x64": "1.3.7",
83
- "@fugood/node-llama-win32-x64-vulkan": "1.3.7",
84
- "@fugood/node-llama-win32-x64-cuda": "1.3.7",
85
- "@fugood/node-llama-win32-arm64": "1.3.7",
86
- "@fugood/node-llama-win32-arm64-vulkan": "1.3.7",
87
- "@fugood/node-llama-darwin-x64": "1.3.7",
88
- "@fugood/node-llama-darwin-arm64": "1.3.7"
75
+ "@fugood/node-llama-linux-x64": "1.3.8",
76
+ "@fugood/node-llama-linux-x64-vulkan": "1.3.8",
77
+ "@fugood/node-llama-linux-x64-cuda": "1.3.8",
78
+ "@fugood/node-llama-linux-arm64-snapdragon": "1.3.8",
79
+ "@fugood/node-llama-linux-arm64": "1.3.8",
80
+ "@fugood/node-llama-linux-arm64-vulkan": "1.3.8",
81
+ "@fugood/node-llama-linux-arm64-cuda": "1.3.8",
82
+ "@fugood/node-llama-win32-x64": "1.3.8",
83
+ "@fugood/node-llama-win32-x64-vulkan": "1.3.8",
84
+ "@fugood/node-llama-win32-x64-cuda": "1.3.8",
85
+ "@fugood/node-llama-win32-arm64": "1.3.8",
86
+ "@fugood/node-llama-win32-arm64-vulkan": "1.3.8",
87
+ "@fugood/node-llama-darwin-x64": "1.3.8",
88
+ "@fugood/node-llama-darwin-arm64": "1.3.8"
89
89
  },
90
90
  "devDependencies": {
91
91
  "@babel/preset-env": "^7.24.4",
@@ -85,10 +85,10 @@ index 754c411e2..71241a6cc 100644
85
85
  struct common_chat_tool_call {
86
86
  std::string name;
87
87
  diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
88
- index f3cc55247..65398844f 100644
88
+ index 0d7fd9a93..6bf3cc7ab 100644
89
89
  --- a/src/llama.cpp/common/common.cpp
90
90
  +++ b/src/llama.cpp/common/common.cpp
91
- @@ -1162,6 +1162,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
91
+ @@ -1217,6 +1217,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
92
92
  mparams.n_gpu_layers = params.n_gpu_layers;
93
93
  }
94
94
 
@@ -97,10 +97,10 @@ index f3cc55247..65398844f 100644
97
97
  mparams.split_mode = params.split_mode;
98
98
  mparams.tensor_split = params.tensor_split;
99
99
  diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
100
- index de5b404dd..d30d252c9 100644
100
+ index 2f23d0baa..e4e6c795e 100644
101
101
  --- a/src/llama.cpp/common/common.h
102
102
  +++ b/src/llama.cpp/common/common.h
103
- @@ -281,6 +281,7 @@ struct lr_opt {
103
+ @@ -299,6 +299,7 @@ struct lr_opt {
104
104
  struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
105
105
 
106
106
  struct common_params {
@@ -109,7 +109,7 @@ index de5b404dd..d30d252c9 100644
109
109
  int32_t n_ctx = 4096; // context size
110
110
  int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
111
111
  diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
112
- index d0cab0bcb..48d532838 100644
112
+ index 7e53a57b7..a328d4db4 100644
113
113
  --- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
114
114
  +++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
115
115
  @@ -106,7 +106,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
@@ -694,6 +694,12 @@ static bool is_autoy(const std::string & value) {
694
694
  }
695
695
 
696
696
  common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
697
+ // default values specific to example
698
+ // note: we place it here instead of inside server.cpp to allow llama-gen-docs to pick it up
699
+ if (ex == LLAMA_EXAMPLE_SERVER) {
700
+ params.use_jinja = true;
701
+ }
702
+
697
703
  // load dynamic backends
698
704
  ggml_backend_load_all();
699
705
 
@@ -1232,6 +1238,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1232
1238
  [](common_params & params, const std::string & value) {
1233
1239
  const auto sampler_names = string_split<std::string>(value, ';');
1234
1240
  params.sampling.samplers = common_sampler_types_from_names(sampler_names, true);
1241
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_SAMPLERS;
1235
1242
  }
1236
1243
  ).set_sparam());
1237
1244
  add_opt(common_arg(
@@ -1261,6 +1268,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1261
1268
  [](common_params & params, const std::string & value) {
1262
1269
  params.sampling.temp = std::stof(value);
1263
1270
  params.sampling.temp = std::max(params.sampling.temp, 0.0f);
1271
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TEMP;
1264
1272
  }
1265
1273
  ).set_sparam());
1266
1274
  add_opt(common_arg(
@@ -1268,6 +1276,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1268
1276
  string_format("top-k sampling (default: %d, 0 = disabled)", params.sampling.top_k),
1269
1277
  [](common_params & params, int value) {
1270
1278
  params.sampling.top_k = value;
1279
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_K;
1271
1280
  }
1272
1281
  ).set_sparam());
1273
1282
  add_opt(common_arg(
@@ -1275,6 +1284,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1275
1284
  string_format("top-p sampling (default: %.1f, 1.0 = disabled)", (double)params.sampling.top_p),
1276
1285
  [](common_params & params, const std::string & value) {
1277
1286
  params.sampling.top_p = std::stof(value);
1287
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_P;
1278
1288
  }
1279
1289
  ).set_sparam());
1280
1290
  add_opt(common_arg(
@@ -1282,6 +1292,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1282
1292
  string_format("min-p sampling (default: %.1f, 0.0 = disabled)", (double)params.sampling.min_p),
1283
1293
  [](common_params & params, const std::string & value) {
1284
1294
  params.sampling.min_p = std::stof(value);
1295
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIN_P;
1285
1296
  }
1286
1297
  ).set_sparam());
1287
1298
  add_opt(common_arg(
@@ -1296,6 +1307,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1296
1307
  string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sampling.xtc_probability),
1297
1308
  [](common_params & params, const std::string & value) {
1298
1309
  params.sampling.xtc_probability = std::stof(value);
1310
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY;
1299
1311
  }
1300
1312
  ).set_sparam());
1301
1313
  add_opt(common_arg(
@@ -1303,6 +1315,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1303
1315
  string_format("xtc threshold (default: %.1f, 1.0 = disabled)", (double)params.sampling.xtc_threshold),
1304
1316
  [](common_params & params, const std::string & value) {
1305
1317
  params.sampling.xtc_threshold = std::stof(value);
1318
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD;
1306
1319
  }
1307
1320
  ).set_sparam());
1308
1321
  add_opt(common_arg(
@@ -1321,6 +1334,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1321
1334
  }
1322
1335
  params.sampling.penalty_last_n = value;
1323
1336
  params.sampling.n_prev = std::max(params.sampling.n_prev, params.sampling.penalty_last_n);
1337
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_LAST_N;
1324
1338
  }
1325
1339
  ).set_sparam());
1326
1340
  add_opt(common_arg(
@@ -1328,6 +1342,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1328
1342
  string_format("penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)", (double)params.sampling.penalty_repeat),
1329
1343
  [](common_params & params, const std::string & value) {
1330
1344
  params.sampling.penalty_repeat = std::stof(value);
1345
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT;
1331
1346
  }
1332
1347
  ).set_sparam());
1333
1348
  add_opt(common_arg(
@@ -1425,6 +1440,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1425
1440
  "(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", params.sampling.mirostat),
1426
1441
  [](common_params & params, int value) {
1427
1442
  params.sampling.mirostat = value;
1443
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT;
1428
1444
  }
1429
1445
  ).set_sparam());
1430
1446
  add_opt(common_arg(
@@ -1432,6 +1448,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1432
1448
  string_format("Mirostat learning rate, parameter eta (default: %.1f)", (double)params.sampling.mirostat_eta),
1433
1449
  [](common_params & params, const std::string & value) {
1434
1450
  params.sampling.mirostat_eta = std::stof(value);
1451
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA;
1435
1452
  }
1436
1453
  ).set_sparam());
1437
1454
  add_opt(common_arg(
@@ -1439,6 +1456,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1439
1456
  string_format("Mirostat target entropy, parameter tau (default: %.1f)", (double)params.sampling.mirostat_tau),
1440
1457
  [](common_params & params, const std::string & value) {
1441
1458
  params.sampling.mirostat_tau = std::stof(value);
1459
+ params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU;
1442
1460
  }
1443
1461
  ).set_sparam());
1444
1462
  add_opt(common_arg(
@@ -2476,11 +2494,18 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
2476
2494
  ).set_examples({LLAMA_EXAMPLE_SERVER}));
2477
2495
  add_opt(common_arg(
2478
2496
  {"--jinja"},
2479
- "use jinja template for chat (default: disabled)",
2497
+ string_format("use jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
2480
2498
  [](common_params & params) {
2481
2499
  params.use_jinja = true;
2482
2500
  }
2483
2501
  ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_JINJA"));
2502
+ add_opt(common_arg(
2503
+ {"--no-jinja"},
2504
+ string_format("disable jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
2505
+ [](common_params & params) {
2506
+ params.use_jinja = false;
2507
+ }
2508
+ ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_NO_JINJA"));
2484
2509
  add_opt(common_arg(
2485
2510
  {"--reasoning-format"}, "FORMAT",
2486
2511
  "controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:\n"
@@ -8,6 +8,7 @@
8
8
  #include "common.h"
9
9
  #include "log.h"
10
10
  #include "llama.h"
11
+ #include "sampling.h"
11
12
 
12
13
  #include <algorithm>
13
14
  #include <cinttypes>
@@ -949,6 +950,58 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
949
950
  // Model utils
950
951
  //
951
952
 
953
+ static inline void common_init_sampler_from_model(
954
+ const llama_model * model,
955
+ common_params_sampling & sparams) {
956
+
957
+ const uint64_t config = sparams.user_sampling_config;
958
+
959
+ auto get_int32 = [&](const char * key, int32_t & dst, uint64_t user_config) {
960
+ if (config & user_config) return;
961
+
962
+ char buf[64] = {0};
963
+ if (llama_model_meta_val_str(model, key, buf, sizeof(buf)) > 0) {
964
+ char * end = nullptr;
965
+ int32_t v = strtol(buf, &end, 10);
966
+ if (end && end != buf) dst = v;
967
+ }
968
+ };
969
+
970
+ auto get_float = [&](const char * key, float & dst, uint64_t user_config) {
971
+ if (config & user_config) return;
972
+
973
+ char buf[128] = {0};
974
+ if (llama_model_meta_val_str(model, key, buf, sizeof(buf)) > 0) {
975
+ char * end = nullptr;
976
+ float v = strtof(buf, &end);
977
+ if (end && end != buf) dst = v;
978
+ }
979
+ };
980
+
981
+ // Sampling sequence
982
+ if (!(config & common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_SAMPLERS)) {
983
+ char buf[512] = {0};
984
+ if (llama_model_meta_val_str(model, llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_SEQUENCE), buf, sizeof(buf)) > 0) {
985
+ const std::vector<std::string> sampler_names = string_split<std::string>(std::string(buf), ';');
986
+ if (!sampler_names.empty()) {
987
+ sparams.samplers = common_sampler_types_from_names(sampler_names, true);
988
+ }
989
+ }
990
+ }
991
+
992
+ get_int32(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_TOP_K), sparams.top_k, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_K);
993
+ get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_TOP_P), sparams.top_p, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_P);
994
+ get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIN_P), sparams.min_p, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIN_P);
995
+ get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_XTC_PROBABILITY), sparams.xtc_probability, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY);
996
+ get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_XTC_THRESHOLD), sparams.xtc_threshold, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD);
997
+ get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_TEMP), sparams.temp, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TEMP);
998
+ get_int32(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_PENALTY_LAST_N), sparams.penalty_last_n, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_LAST_N);
999
+ get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_PENALTY_REPEAT), sparams.penalty_repeat, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT);
1000
+ get_int32(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT), sparams.mirostat, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT);
1001
+ get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT_TAU), sparams.mirostat_tau, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU);
1002
+ get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT_ETA), sparams.mirostat_eta, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA);
1003
+ }
1004
+
952
1005
  struct common_init_result common_init_from_params(common_params & params) {
953
1006
  common_init_result iparams;
954
1007
  auto mparams = common_model_params_to_llama(params);
@@ -960,6 +1013,8 @@ struct common_init_result common_init_from_params(common_params & params) {
960
1013
  return iparams;
961
1014
  }
962
1015
 
1016
+ common_init_sampler_from_model(model, params.sampling);
1017
+
963
1018
  const llama_vocab * vocab = llama_model_get_vocab(model);
964
1019
 
965
1020
  auto cparams = common_context_params_to_llama(params);
@@ -140,6 +140,22 @@ struct common_grammar_trigger {
140
140
  llama_token token = LLAMA_TOKEN_NULL;
141
141
  };
142
142
 
143
+ enum common_params_sampling_config : uint64_t {
144
+ COMMON_PARAMS_SAMPLING_CONFIG_SAMPLERS = 1 << 0,
145
+ COMMON_PARAMS_SAMPLING_CONFIG_TOP_K = 1 << 1,
146
+ COMMON_PARAMS_SAMPLING_CONFIG_TOP_P = 1 << 2,
147
+ COMMON_PARAMS_SAMPLING_CONFIG_MIN_P = 1 << 3,
148
+ COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY = 1 << 4,
149
+ COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD = 1 << 5,
150
+ COMMON_PARAMS_SAMPLING_CONFIG_TEMP = 1 << 6,
151
+ COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_LAST_N = 1 << 7,
152
+ COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT = 1 << 8,
153
+ COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT = 1 << 9,
154
+ COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU = 1 << 10,
155
+ COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA = 1 << 11,
156
+ };
157
+
158
+
143
159
  // sampling parameters
144
160
  struct common_params_sampling {
145
161
  uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
@@ -172,6 +188,8 @@ struct common_params_sampling {
172
188
  bool no_perf = false; // disable performance metrics
173
189
  bool timing_per_token = false;
174
190
 
191
+ uint64_t user_sampling_config = 0; // bitfield to track user-specified samplers
192
+
175
193
  std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY
176
194
 
177
195
 
@@ -25,16 +25,17 @@ if(GIT_EXE)
25
25
  )
26
26
  endif()
27
27
 
28
- # Build the version string with optional dirty flag
29
28
  set(GGML_VERSION "${GGML_VERSION_BASE}")
30
- if(GGML_GIT_DIRTY AND NOT GGML_GIT_DIRTY EQUAL 0)
31
- set(GGML_VERSION "${GGML_VERSION}-dirty")
32
- endif()
33
29
 
34
30
  if(NOT GGML_BUILD_COMMIT)
35
31
  set(GGML_BUILD_COMMIT "unknown")
36
32
  endif()
37
33
 
34
+ # Build the commit string with optional dirty flag
35
+ if(DEFINED GGML_GIT_DIRTY AND GGML_GIT_DIRTY EQUAL 1)
36
+ set(GGML_BUILD_COMMIT "${GGML_BUILD_COMMIT}-dirty")
37
+ endif()
38
+
38
39
  include(CheckIncludeFileCXX)
39
40
 
40
41
  set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -530,6 +530,7 @@ extern "C" {
530
530
  GGML_OP_ARANGE,
531
531
  GGML_OP_TIMESTEP_EMBEDDING,
532
532
  GGML_OP_ARGSORT,
533
+ GGML_OP_TOP_K,
533
534
  GGML_OP_LEAKY_RELU,
534
535
  GGML_OP_TRI,
535
536
  GGML_OP_FILL,
@@ -2258,18 +2259,25 @@ extern "C" {
2258
2259
  struct ggml_tensor * a,
2259
2260
  enum ggml_sort_order order);
2260
2261
 
2261
- GGML_API struct ggml_tensor * ggml_arange(
2262
+ // similar to ggml_top_k but implemented as `argsort` + `view`
2263
+ GGML_API struct ggml_tensor * ggml_argsort_top_k(
2262
2264
  struct ggml_context * ctx,
2263
- float start,
2264
- float stop,
2265
- float step);
2265
+ struct ggml_tensor * a,
2266
+ int k);
2266
2267
 
2267
2268
  // top k elements per row
2269
+ // note: the resulting top k indices are in no particular order
2268
2270
  GGML_API struct ggml_tensor * ggml_top_k(
2269
2271
  struct ggml_context * ctx,
2270
2272
  struct ggml_tensor * a,
2271
2273
  int k);
2272
2274
 
2275
+ GGML_API struct ggml_tensor * ggml_arange(
2276
+ struct ggml_context * ctx,
2277
+ float start,
2278
+ float stop,
2279
+ float step);
2280
+
2273
2281
  #define GGML_KQ_MASK_PAD 64
2274
2282
 
2275
2283
  // q: [n_embd_k, n_batch, n_head, ne3 ]
@@ -328,6 +328,14 @@ function(ggml_add_cpu_backend_variant tag_name)
328
328
  set(GGML_INTERNAL_${feat} OFF)
329
329
  endforeach()
330
330
 
331
+ foreach (feat ${ARGN})
332
+ set(GGML_INTERNAL_${feat} ON)
333
+ endforeach()
334
+ elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
335
+ foreach (feat RVV)
336
+ set(GGML_INTERNAL_${feat} OFF)
337
+ endforeach()
338
+
331
339
  foreach (feat ${ARGN})
332
340
  set(GGML_INTERNAL_${feat} ON)
333
341
  endforeach()
@@ -402,6 +410,13 @@ if (GGML_CPU_ALL_VARIANTS)
402
410
  else()
403
411
  message(FATAL_ERROR "Unsupported s390x target OS: ${CMAKE_SYSTEM_NAME}")
404
412
  endif()
413
+ elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
414
+ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
415
+ ggml_add_cpu_backend_variant(riscv64_0)
416
+ ggml_add_cpu_backend_variant(riscv64_v RVV)
417
+ else()
418
+ message(FATAL_ERROR "Unsupported RISC-V target OS: ${CMAKE_SYSTEM_NAME}")
419
+ endif()
405
420
  else()
406
421
  message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
407
422
  endif()
@@ -224,7 +224,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
224
224
 
225
225
  include(CheckCXXSourceCompiles)
226
226
  set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
227
- set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS}")
227
+ string(REPLACE ";" " " ARCH_FLAGS_STR "${ARCH_FLAGS}")
228
+ set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS_STR}")
228
229
  foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC SME)
229
230
  set(ARM_FEATURE "HAVE_${feature}")
230
231
  check_cxx_source_compiles(
@@ -452,22 +453,35 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
452
453
  ggml-cpu/spacemit/ime_kernels.h
453
454
  )
454
455
  endif()
455
- set(MARCH_STR "rv64gc")
456
- if (GGML_RV_ZFH)
457
- string(APPEND MARCH_STR "_zfh")
458
- endif()
459
- if (GGML_XTHEADVECTOR)
460
- string(APPEND MARCH_STR "_xtheadvector")
461
- elseif (GGML_RVV)
462
- string(APPEND MARCH_STR "_v")
463
- if (GGML_RV_ZVFH)
464
- string(APPEND MARCH_STR "_zvfh")
456
+ if(NOT GGML_CPU_ALL_VARIANTS)
457
+ set(MARCH_STR "rv64gc")
458
+ if (GGML_RV_ZFH)
459
+ string(APPEND MARCH_STR "_zfh")
465
460
  endif()
461
+ if (GGML_XTHEADVECTOR)
462
+ string(APPEND MARCH_STR "_xtheadvector")
463
+ elseif (GGML_RVV)
464
+ string(APPEND MARCH_STR "_v")
465
+ if (GGML_RV_ZVFH)
466
+ string(APPEND MARCH_STR "_zvfh")
467
+ endif()
468
+ endif()
469
+ if (GGML_RV_ZICBOP)
470
+ string(APPEND MARCH_STR "_zicbop")
471
+ endif()
472
+ list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
473
+ else()
474
+ # Begin with the lowest baseline
475
+ set(ARCH_DEFINITIONS "")
476
+
477
+ if (GGML_INTERNAL_RVV)
478
+ message(STATUS "RVV enabled")
479
+ list(APPEND ARCH_DEFINITIONS GGML_USE_RVV)
480
+ list(APPEND ARCH_FLAGS -march=rv64gc_v -mabi=lp64d)
481
+ endif()
482
+
483
+ ggml_add_cpu_backend_features(${GGML_CPU_NAME} riscv ${ARCH_DEFINITIONS})
466
484
  endif()
467
- if (GGML_RV_ZICBOP)
468
- string(APPEND MARCH_STR "_zicbop")
469
- endif()
470
- list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
471
485
  elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
472
486
  message(STATUS "s390x detected")
473
487
  list(APPEND GGML_CPU_SOURCES