@fugood/llama.node 1.3.7 → 1.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +5 -5
- package/src/llama.cpp/common/arg.cpp +26 -1
- package/src/llama.cpp/common/common.cpp +55 -0
- package/src/llama.cpp/common/common.h +18 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +5 -4
- package/src/llama.cpp/ggml/include/ggml.h +12 -4
- package/src/llama.cpp/ggml/src/CMakeLists.txt +15 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +29 -15
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +388 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/cpu-feats.cpp +35 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +9 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +69 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +9 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +84 -85
- package/src/llama.cpp/include/llama.h +18 -0
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +47 -13
- package/src/llama.cpp/src/llama-arch.h +13 -0
- package/src/llama.cpp/src/llama-context.cpp +1 -1
- package/src/llama.cpp/src/llama-graph.cpp +3 -3
- package/src/llama.cpp/src/llama-model.cpp +39 -1
- package/src/llama.cpp/src/models/models.h +4 -0
- package/src/llama.cpp/src/models/rnd1.cpp +126 -0
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.3.
|
|
4
|
+
"version": "1.3.8",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-linux-x64": "1.3.
|
|
76
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.3.
|
|
77
|
-
"@fugood/node-llama-linux-x64-cuda": "1.3.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-snapdragon": "1.3.
|
|
79
|
-
"@fugood/node-llama-linux-arm64": "1.3.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.3.
|
|
81
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.3.
|
|
82
|
-
"@fugood/node-llama-win32-x64": "1.3.
|
|
83
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.3.
|
|
84
|
-
"@fugood/node-llama-win32-x64-cuda": "1.3.
|
|
85
|
-
"@fugood/node-llama-win32-arm64": "1.3.
|
|
86
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.3.
|
|
87
|
-
"@fugood/node-llama-darwin-x64": "1.3.
|
|
88
|
-
"@fugood/node-llama-darwin-arm64": "1.3.
|
|
75
|
+
"@fugood/node-llama-linux-x64": "1.3.8",
|
|
76
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.3.8",
|
|
77
|
+
"@fugood/node-llama-linux-x64-cuda": "1.3.8",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.3.8",
|
|
79
|
+
"@fugood/node-llama-linux-arm64": "1.3.8",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.3.8",
|
|
81
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.3.8",
|
|
82
|
+
"@fugood/node-llama-win32-x64": "1.3.8",
|
|
83
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.3.8",
|
|
84
|
+
"@fugood/node-llama-win32-x64-cuda": "1.3.8",
|
|
85
|
+
"@fugood/node-llama-win32-arm64": "1.3.8",
|
|
86
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.3.8",
|
|
87
|
+
"@fugood/node-llama-darwin-x64": "1.3.8",
|
|
88
|
+
"@fugood/node-llama-darwin-arm64": "1.3.8"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -85,10 +85,10 @@ index 754c411e2..71241a6cc 100644
|
|
|
85
85
|
struct common_chat_tool_call {
|
|
86
86
|
std::string name;
|
|
87
87
|
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
|
|
88
|
-
index
|
|
88
|
+
index 0d7fd9a93..6bf3cc7ab 100644
|
|
89
89
|
--- a/src/llama.cpp/common/common.cpp
|
|
90
90
|
+++ b/src/llama.cpp/common/common.cpp
|
|
91
|
-
@@ -
|
|
91
|
+
@@ -1217,6 +1217,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
92
92
|
mparams.n_gpu_layers = params.n_gpu_layers;
|
|
93
93
|
}
|
|
94
94
|
|
|
@@ -97,10 +97,10 @@ index f3cc55247..65398844f 100644
|
|
|
97
97
|
mparams.split_mode = params.split_mode;
|
|
98
98
|
mparams.tensor_split = params.tensor_split;
|
|
99
99
|
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
|
|
100
|
-
index
|
|
100
|
+
index 2f23d0baa..e4e6c795e 100644
|
|
101
101
|
--- a/src/llama.cpp/common/common.h
|
|
102
102
|
+++ b/src/llama.cpp/common/common.h
|
|
103
|
-
@@ -
|
|
103
|
+
@@ -299,6 +299,7 @@ struct lr_opt {
|
|
104
104
|
struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
|
|
105
105
|
|
|
106
106
|
struct common_params {
|
|
@@ -109,7 +109,7 @@ index de5b404dd..d30d252c9 100644
|
|
|
109
109
|
int32_t n_ctx = 4096; // context size
|
|
110
110
|
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
|
111
111
|
diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
112
|
-
index
|
|
112
|
+
index 7e53a57b7..a328d4db4 100644
|
|
113
113
|
--- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
114
114
|
+++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
115
115
|
@@ -106,7 +106,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
@@ -694,6 +694,12 @@ static bool is_autoy(const std::string & value) {
|
|
|
694
694
|
}
|
|
695
695
|
|
|
696
696
|
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
|
|
697
|
+
// default values specific to example
|
|
698
|
+
// note: we place it here instead of inside server.cpp to allow llama-gen-docs to pick it up
|
|
699
|
+
if (ex == LLAMA_EXAMPLE_SERVER) {
|
|
700
|
+
params.use_jinja = true;
|
|
701
|
+
}
|
|
702
|
+
|
|
697
703
|
// load dynamic backends
|
|
698
704
|
ggml_backend_load_all();
|
|
699
705
|
|
|
@@ -1232,6 +1238,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1232
1238
|
[](common_params & params, const std::string & value) {
|
|
1233
1239
|
const auto sampler_names = string_split<std::string>(value, ';');
|
|
1234
1240
|
params.sampling.samplers = common_sampler_types_from_names(sampler_names, true);
|
|
1241
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_SAMPLERS;
|
|
1235
1242
|
}
|
|
1236
1243
|
).set_sparam());
|
|
1237
1244
|
add_opt(common_arg(
|
|
@@ -1261,6 +1268,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1261
1268
|
[](common_params & params, const std::string & value) {
|
|
1262
1269
|
params.sampling.temp = std::stof(value);
|
|
1263
1270
|
params.sampling.temp = std::max(params.sampling.temp, 0.0f);
|
|
1271
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TEMP;
|
|
1264
1272
|
}
|
|
1265
1273
|
).set_sparam());
|
|
1266
1274
|
add_opt(common_arg(
|
|
@@ -1268,6 +1276,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1268
1276
|
string_format("top-k sampling (default: %d, 0 = disabled)", params.sampling.top_k),
|
|
1269
1277
|
[](common_params & params, int value) {
|
|
1270
1278
|
params.sampling.top_k = value;
|
|
1279
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_K;
|
|
1271
1280
|
}
|
|
1272
1281
|
).set_sparam());
|
|
1273
1282
|
add_opt(common_arg(
|
|
@@ -1275,6 +1284,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1275
1284
|
string_format("top-p sampling (default: %.1f, 1.0 = disabled)", (double)params.sampling.top_p),
|
|
1276
1285
|
[](common_params & params, const std::string & value) {
|
|
1277
1286
|
params.sampling.top_p = std::stof(value);
|
|
1287
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_P;
|
|
1278
1288
|
}
|
|
1279
1289
|
).set_sparam());
|
|
1280
1290
|
add_opt(common_arg(
|
|
@@ -1282,6 +1292,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1282
1292
|
string_format("min-p sampling (default: %.1f, 0.0 = disabled)", (double)params.sampling.min_p),
|
|
1283
1293
|
[](common_params & params, const std::string & value) {
|
|
1284
1294
|
params.sampling.min_p = std::stof(value);
|
|
1295
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIN_P;
|
|
1285
1296
|
}
|
|
1286
1297
|
).set_sparam());
|
|
1287
1298
|
add_opt(common_arg(
|
|
@@ -1296,6 +1307,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1296
1307
|
string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sampling.xtc_probability),
|
|
1297
1308
|
[](common_params & params, const std::string & value) {
|
|
1298
1309
|
params.sampling.xtc_probability = std::stof(value);
|
|
1310
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY;
|
|
1299
1311
|
}
|
|
1300
1312
|
).set_sparam());
|
|
1301
1313
|
add_opt(common_arg(
|
|
@@ -1303,6 +1315,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1303
1315
|
string_format("xtc threshold (default: %.1f, 1.0 = disabled)", (double)params.sampling.xtc_threshold),
|
|
1304
1316
|
[](common_params & params, const std::string & value) {
|
|
1305
1317
|
params.sampling.xtc_threshold = std::stof(value);
|
|
1318
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD;
|
|
1306
1319
|
}
|
|
1307
1320
|
).set_sparam());
|
|
1308
1321
|
add_opt(common_arg(
|
|
@@ -1321,6 +1334,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1321
1334
|
}
|
|
1322
1335
|
params.sampling.penalty_last_n = value;
|
|
1323
1336
|
params.sampling.n_prev = std::max(params.sampling.n_prev, params.sampling.penalty_last_n);
|
|
1337
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_LAST_N;
|
|
1324
1338
|
}
|
|
1325
1339
|
).set_sparam());
|
|
1326
1340
|
add_opt(common_arg(
|
|
@@ -1328,6 +1342,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1328
1342
|
string_format("penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)", (double)params.sampling.penalty_repeat),
|
|
1329
1343
|
[](common_params & params, const std::string & value) {
|
|
1330
1344
|
params.sampling.penalty_repeat = std::stof(value);
|
|
1345
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT;
|
|
1331
1346
|
}
|
|
1332
1347
|
).set_sparam());
|
|
1333
1348
|
add_opt(common_arg(
|
|
@@ -1425,6 +1440,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1425
1440
|
"(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", params.sampling.mirostat),
|
|
1426
1441
|
[](common_params & params, int value) {
|
|
1427
1442
|
params.sampling.mirostat = value;
|
|
1443
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT;
|
|
1428
1444
|
}
|
|
1429
1445
|
).set_sparam());
|
|
1430
1446
|
add_opt(common_arg(
|
|
@@ -1432,6 +1448,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1432
1448
|
string_format("Mirostat learning rate, parameter eta (default: %.1f)", (double)params.sampling.mirostat_eta),
|
|
1433
1449
|
[](common_params & params, const std::string & value) {
|
|
1434
1450
|
params.sampling.mirostat_eta = std::stof(value);
|
|
1451
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA;
|
|
1435
1452
|
}
|
|
1436
1453
|
).set_sparam());
|
|
1437
1454
|
add_opt(common_arg(
|
|
@@ -1439,6 +1456,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1439
1456
|
string_format("Mirostat target entropy, parameter tau (default: %.1f)", (double)params.sampling.mirostat_tau),
|
|
1440
1457
|
[](common_params & params, const std::string & value) {
|
|
1441
1458
|
params.sampling.mirostat_tau = std::stof(value);
|
|
1459
|
+
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU;
|
|
1442
1460
|
}
|
|
1443
1461
|
).set_sparam());
|
|
1444
1462
|
add_opt(common_arg(
|
|
@@ -2476,11 +2494,18 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2476
2494
|
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
2477
2495
|
add_opt(common_arg(
|
|
2478
2496
|
{"--jinja"},
|
|
2479
|
-
"use jinja template for chat (default:
|
|
2497
|
+
string_format("use jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
|
|
2480
2498
|
[](common_params & params) {
|
|
2481
2499
|
params.use_jinja = true;
|
|
2482
2500
|
}
|
|
2483
2501
|
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_JINJA"));
|
|
2502
|
+
add_opt(common_arg(
|
|
2503
|
+
{"--no-jinja"},
|
|
2504
|
+
string_format("disable jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
|
|
2505
|
+
[](common_params & params) {
|
|
2506
|
+
params.use_jinja = false;
|
|
2507
|
+
}
|
|
2508
|
+
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_NO_JINJA"));
|
|
2484
2509
|
add_opt(common_arg(
|
|
2485
2510
|
{"--reasoning-format"}, "FORMAT",
|
|
2486
2511
|
"controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:\n"
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
#include "common.h"
|
|
9
9
|
#include "log.h"
|
|
10
10
|
#include "llama.h"
|
|
11
|
+
#include "sampling.h"
|
|
11
12
|
|
|
12
13
|
#include <algorithm>
|
|
13
14
|
#include <cinttypes>
|
|
@@ -949,6 +950,58 @@ std::vector<common_file_info> fs_list_files(const std::string & path) {
|
|
|
949
950
|
// Model utils
|
|
950
951
|
//
|
|
951
952
|
|
|
953
|
+
static inline void common_init_sampler_from_model(
|
|
954
|
+
const llama_model * model,
|
|
955
|
+
common_params_sampling & sparams) {
|
|
956
|
+
|
|
957
|
+
const uint64_t config = sparams.user_sampling_config;
|
|
958
|
+
|
|
959
|
+
auto get_int32 = [&](const char * key, int32_t & dst, uint64_t user_config) {
|
|
960
|
+
if (config & user_config) return;
|
|
961
|
+
|
|
962
|
+
char buf[64] = {0};
|
|
963
|
+
if (llama_model_meta_val_str(model, key, buf, sizeof(buf)) > 0) {
|
|
964
|
+
char * end = nullptr;
|
|
965
|
+
int32_t v = strtol(buf, &end, 10);
|
|
966
|
+
if (end && end != buf) dst = v;
|
|
967
|
+
}
|
|
968
|
+
};
|
|
969
|
+
|
|
970
|
+
auto get_float = [&](const char * key, float & dst, uint64_t user_config) {
|
|
971
|
+
if (config & user_config) return;
|
|
972
|
+
|
|
973
|
+
char buf[128] = {0};
|
|
974
|
+
if (llama_model_meta_val_str(model, key, buf, sizeof(buf)) > 0) {
|
|
975
|
+
char * end = nullptr;
|
|
976
|
+
float v = strtof(buf, &end);
|
|
977
|
+
if (end && end != buf) dst = v;
|
|
978
|
+
}
|
|
979
|
+
};
|
|
980
|
+
|
|
981
|
+
// Sampling sequence
|
|
982
|
+
if (!(config & common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_SAMPLERS)) {
|
|
983
|
+
char buf[512] = {0};
|
|
984
|
+
if (llama_model_meta_val_str(model, llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_SEQUENCE), buf, sizeof(buf)) > 0) {
|
|
985
|
+
const std::vector<std::string> sampler_names = string_split<std::string>(std::string(buf), ';');
|
|
986
|
+
if (!sampler_names.empty()) {
|
|
987
|
+
sparams.samplers = common_sampler_types_from_names(sampler_names, true);
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
get_int32(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_TOP_K), sparams.top_k, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_K);
|
|
993
|
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_TOP_P), sparams.top_p, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_P);
|
|
994
|
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIN_P), sparams.min_p, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIN_P);
|
|
995
|
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_XTC_PROBABILITY), sparams.xtc_probability, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY);
|
|
996
|
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_XTC_THRESHOLD), sparams.xtc_threshold, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD);
|
|
997
|
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_TEMP), sparams.temp, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TEMP);
|
|
998
|
+
get_int32(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_PENALTY_LAST_N), sparams.penalty_last_n, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_LAST_N);
|
|
999
|
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_PENALTY_REPEAT), sparams.penalty_repeat, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT);
|
|
1000
|
+
get_int32(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT), sparams.mirostat, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT);
|
|
1001
|
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT_TAU), sparams.mirostat_tau, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU);
|
|
1002
|
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT_ETA), sparams.mirostat_eta, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA);
|
|
1003
|
+
}
|
|
1004
|
+
|
|
952
1005
|
struct common_init_result common_init_from_params(common_params & params) {
|
|
953
1006
|
common_init_result iparams;
|
|
954
1007
|
auto mparams = common_model_params_to_llama(params);
|
|
@@ -960,6 +1013,8 @@ struct common_init_result common_init_from_params(common_params & params) {
|
|
|
960
1013
|
return iparams;
|
|
961
1014
|
}
|
|
962
1015
|
|
|
1016
|
+
common_init_sampler_from_model(model, params.sampling);
|
|
1017
|
+
|
|
963
1018
|
const llama_vocab * vocab = llama_model_get_vocab(model);
|
|
964
1019
|
|
|
965
1020
|
auto cparams = common_context_params_to_llama(params);
|
|
@@ -140,6 +140,22 @@ struct common_grammar_trigger {
|
|
|
140
140
|
llama_token token = LLAMA_TOKEN_NULL;
|
|
141
141
|
};
|
|
142
142
|
|
|
143
|
+
enum common_params_sampling_config : uint64_t {
|
|
144
|
+
COMMON_PARAMS_SAMPLING_CONFIG_SAMPLERS = 1 << 0,
|
|
145
|
+
COMMON_PARAMS_SAMPLING_CONFIG_TOP_K = 1 << 1,
|
|
146
|
+
COMMON_PARAMS_SAMPLING_CONFIG_TOP_P = 1 << 2,
|
|
147
|
+
COMMON_PARAMS_SAMPLING_CONFIG_MIN_P = 1 << 3,
|
|
148
|
+
COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY = 1 << 4,
|
|
149
|
+
COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD = 1 << 5,
|
|
150
|
+
COMMON_PARAMS_SAMPLING_CONFIG_TEMP = 1 << 6,
|
|
151
|
+
COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_LAST_N = 1 << 7,
|
|
152
|
+
COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT = 1 << 8,
|
|
153
|
+
COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT = 1 << 9,
|
|
154
|
+
COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU = 1 << 10,
|
|
155
|
+
COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA = 1 << 11,
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
|
|
143
159
|
// sampling parameters
|
|
144
160
|
struct common_params_sampling {
|
|
145
161
|
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
|
|
@@ -172,6 +188,8 @@ struct common_params_sampling {
|
|
|
172
188
|
bool no_perf = false; // disable performance metrics
|
|
173
189
|
bool timing_per_token = false;
|
|
174
190
|
|
|
191
|
+
uint64_t user_sampling_config = 0; // bitfield to track user-specified samplers
|
|
192
|
+
|
|
175
193
|
std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY
|
|
176
194
|
|
|
177
195
|
|
|
@@ -25,16 +25,17 @@ if(GIT_EXE)
|
|
|
25
25
|
)
|
|
26
26
|
endif()
|
|
27
27
|
|
|
28
|
-
# Build the version string with optional dirty flag
|
|
29
28
|
set(GGML_VERSION "${GGML_VERSION_BASE}")
|
|
30
|
-
if(GGML_GIT_DIRTY AND NOT GGML_GIT_DIRTY EQUAL 0)
|
|
31
|
-
set(GGML_VERSION "${GGML_VERSION}-dirty")
|
|
32
|
-
endif()
|
|
33
29
|
|
|
34
30
|
if(NOT GGML_BUILD_COMMIT)
|
|
35
31
|
set(GGML_BUILD_COMMIT "unknown")
|
|
36
32
|
endif()
|
|
37
33
|
|
|
34
|
+
# Build the commit string with optional dirty flag
|
|
35
|
+
if(DEFINED GGML_GIT_DIRTY AND GGML_GIT_DIRTY EQUAL 1)
|
|
36
|
+
set(GGML_BUILD_COMMIT "${GGML_BUILD_COMMIT}-dirty")
|
|
37
|
+
endif()
|
|
38
|
+
|
|
38
39
|
include(CheckIncludeFileCXX)
|
|
39
40
|
|
|
40
41
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
|
@@ -530,6 +530,7 @@ extern "C" {
|
|
|
530
530
|
GGML_OP_ARANGE,
|
|
531
531
|
GGML_OP_TIMESTEP_EMBEDDING,
|
|
532
532
|
GGML_OP_ARGSORT,
|
|
533
|
+
GGML_OP_TOP_K,
|
|
533
534
|
GGML_OP_LEAKY_RELU,
|
|
534
535
|
GGML_OP_TRI,
|
|
535
536
|
GGML_OP_FILL,
|
|
@@ -2258,18 +2259,25 @@ extern "C" {
|
|
|
2258
2259
|
struct ggml_tensor * a,
|
|
2259
2260
|
enum ggml_sort_order order);
|
|
2260
2261
|
|
|
2261
|
-
|
|
2262
|
+
// similar to ggml_top_k but implemented as `argsort` + `view`
|
|
2263
|
+
GGML_API struct ggml_tensor * ggml_argsort_top_k(
|
|
2262
2264
|
struct ggml_context * ctx,
|
|
2263
|
-
|
|
2264
|
-
|
|
2265
|
-
float step);
|
|
2265
|
+
struct ggml_tensor * a,
|
|
2266
|
+
int k);
|
|
2266
2267
|
|
|
2267
2268
|
// top k elements per row
|
|
2269
|
+
// note: the resulting top k indices are in no particular order
|
|
2268
2270
|
GGML_API struct ggml_tensor * ggml_top_k(
|
|
2269
2271
|
struct ggml_context * ctx,
|
|
2270
2272
|
struct ggml_tensor * a,
|
|
2271
2273
|
int k);
|
|
2272
2274
|
|
|
2275
|
+
GGML_API struct ggml_tensor * ggml_arange(
|
|
2276
|
+
struct ggml_context * ctx,
|
|
2277
|
+
float start,
|
|
2278
|
+
float stop,
|
|
2279
|
+
float step);
|
|
2280
|
+
|
|
2273
2281
|
#define GGML_KQ_MASK_PAD 64
|
|
2274
2282
|
|
|
2275
2283
|
// q: [n_embd_k, n_batch, n_head, ne3 ]
|
|
@@ -328,6 +328,14 @@ function(ggml_add_cpu_backend_variant tag_name)
|
|
|
328
328
|
set(GGML_INTERNAL_${feat} OFF)
|
|
329
329
|
endforeach()
|
|
330
330
|
|
|
331
|
+
foreach (feat ${ARGN})
|
|
332
|
+
set(GGML_INTERNAL_${feat} ON)
|
|
333
|
+
endforeach()
|
|
334
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
|
|
335
|
+
foreach (feat RVV)
|
|
336
|
+
set(GGML_INTERNAL_${feat} OFF)
|
|
337
|
+
endforeach()
|
|
338
|
+
|
|
331
339
|
foreach (feat ${ARGN})
|
|
332
340
|
set(GGML_INTERNAL_${feat} ON)
|
|
333
341
|
endforeach()
|
|
@@ -402,6 +410,13 @@ if (GGML_CPU_ALL_VARIANTS)
|
|
|
402
410
|
else()
|
|
403
411
|
message(FATAL_ERROR "Unsupported s390x target OS: ${CMAKE_SYSTEM_NAME}")
|
|
404
412
|
endif()
|
|
413
|
+
elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
|
|
414
|
+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
|
415
|
+
ggml_add_cpu_backend_variant(riscv64_0)
|
|
416
|
+
ggml_add_cpu_backend_variant(riscv64_v RVV)
|
|
417
|
+
else()
|
|
418
|
+
message(FATAL_ERROR "Unsupported RISC-V target OS: ${CMAKE_SYSTEM_NAME}")
|
|
419
|
+
endif()
|
|
405
420
|
else()
|
|
406
421
|
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
|
|
407
422
|
endif()
|
|
@@ -224,7 +224,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
224
224
|
|
|
225
225
|
include(CheckCXXSourceCompiles)
|
|
226
226
|
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
|
|
227
|
-
|
|
227
|
+
string(REPLACE ";" " " ARCH_FLAGS_STR "${ARCH_FLAGS}")
|
|
228
|
+
set(CMAKE_REQUIRED_FLAGS "${ARCH_FLAGS_STR}")
|
|
228
229
|
foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC SME)
|
|
229
230
|
set(ARM_FEATURE "HAVE_${feature}")
|
|
230
231
|
check_cxx_source_compiles(
|
|
@@ -452,22 +453,35 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
|
452
453
|
ggml-cpu/spacemit/ime_kernels.h
|
|
453
454
|
)
|
|
454
455
|
endif()
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
if (GGML_XTHEADVECTOR)
|
|
460
|
-
string(APPEND MARCH_STR "_xtheadvector")
|
|
461
|
-
elseif (GGML_RVV)
|
|
462
|
-
string(APPEND MARCH_STR "_v")
|
|
463
|
-
if (GGML_RV_ZVFH)
|
|
464
|
-
string(APPEND MARCH_STR "_zvfh")
|
|
456
|
+
if(NOT GGML_CPU_ALL_VARIANTS)
|
|
457
|
+
set(MARCH_STR "rv64gc")
|
|
458
|
+
if (GGML_RV_ZFH)
|
|
459
|
+
string(APPEND MARCH_STR "_zfh")
|
|
465
460
|
endif()
|
|
461
|
+
if (GGML_XTHEADVECTOR)
|
|
462
|
+
string(APPEND MARCH_STR "_xtheadvector")
|
|
463
|
+
elseif (GGML_RVV)
|
|
464
|
+
string(APPEND MARCH_STR "_v")
|
|
465
|
+
if (GGML_RV_ZVFH)
|
|
466
|
+
string(APPEND MARCH_STR "_zvfh")
|
|
467
|
+
endif()
|
|
468
|
+
endif()
|
|
469
|
+
if (GGML_RV_ZICBOP)
|
|
470
|
+
string(APPEND MARCH_STR "_zicbop")
|
|
471
|
+
endif()
|
|
472
|
+
list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
|
|
473
|
+
else()
|
|
474
|
+
# Begin with the lowest baseline
|
|
475
|
+
set(ARCH_DEFINITIONS "")
|
|
476
|
+
|
|
477
|
+
if (GGML_INTERNAL_RVV)
|
|
478
|
+
message(STATUS "RVV enabled")
|
|
479
|
+
list(APPEND ARCH_DEFINITIONS GGML_USE_RVV)
|
|
480
|
+
list(APPEND ARCH_FLAGS -march=rv64gc_v -mabi=lp64d)
|
|
481
|
+
endif()
|
|
482
|
+
|
|
483
|
+
ggml_add_cpu_backend_features(${GGML_CPU_NAME} riscv ${ARCH_DEFINITIONS})
|
|
466
484
|
endif()
|
|
467
|
-
if (GGML_RV_ZICBOP)
|
|
468
|
-
string(APPEND MARCH_STR "_zicbop")
|
|
469
|
-
endif()
|
|
470
|
-
list(APPEND ARCH_FLAGS "-march=${MARCH_STR}" -mabi=lp64d)
|
|
471
485
|
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
|
|
472
486
|
message(STATUS "s390x detected")
|
|
473
487
|
list(APPEND GGML_CPU_SOURCES
|