@fugood/llama.node 1.6.0-rc.2 → 1.6.0-rc.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +15 -15
- package/src/llama.cpp/common/arg.cpp +25 -21
- package/src/llama.cpp/common/chat-parser.cpp +2 -2
- package/src/llama.cpp/common/chat.cpp +159 -139
- package/src/llama.cpp/common/chat.h +16 -9
- package/src/llama.cpp/common/jinja/caps.cpp +48 -5
- package/src/llama.cpp/common/jinja/caps.h +5 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +539 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +26 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +6 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +289 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +345 -15
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +21 -4
- package/src/llama.cpp/include/llama.h +3 -2
- package/src/llama.cpp/src/llama-context.cpp +6 -5
- package/src/llama.cpp/src/llama-graph.cpp +159 -18
- package/src/llama.cpp/src/llama-graph.h +54 -3
- package/src/llama.cpp/src/llama-hparams.cpp +17 -2
- package/src/llama.cpp/src/llama-hparams.h +10 -4
- package/src/llama.cpp/src/llama-kv-cache.cpp +34 -10
- package/src/llama.cpp/src/llama-model-saver.cpp +2 -2
- package/src/llama.cpp/src/llama-model.cpp +14 -16
- package/src/llama.cpp/src/llama-quant.cpp +53 -56
- package/src/llama.cpp/src/llama.cpp +50 -16
- package/src/llama.cpp/src/models/deepseek2.cpp +14 -14
- package/src/llama.cpp/src/models/gemma3n-iswa.cpp +2 -2
- package/src/llama.cpp/src/models/minicpm3.cpp +1 -0
- package/src/llama.cpp/src/models/plm.cpp +1 -0
- package/src/llama.cpp/src/models/qwen3vl-moe.cpp +5 -14
- package/src/llama.cpp/src/models/qwen3vl.cpp +5 -14
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.6.0-rc.
|
|
4
|
+
"version": "1.6.0-rc.4",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-darwin-arm64": "1.6.0-rc.
|
|
76
|
-
"@fugood/node-llama-darwin-x64": "1.6.0-rc.
|
|
77
|
-
"@fugood/node-llama-linux-arm64": "1.6.0-rc.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.6.0-rc.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-snapdragon": "1.6.0-rc.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.6.0-rc.
|
|
81
|
-
"@fugood/node-llama-linux-x64": "1.6.0-rc.
|
|
82
|
-
"@fugood/node-llama-linux-x64-cuda": "1.6.0-rc.
|
|
83
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.6.0-rc.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.6.0-rc.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.6.0-rc.
|
|
86
|
-
"@fugood/node-llama-win32-x64": "1.6.0-rc.
|
|
87
|
-
"@fugood/node-llama-win32-x64-cuda": "1.6.0-rc.
|
|
88
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.6.0-rc.
|
|
75
|
+
"@fugood/node-llama-darwin-arm64": "1.6.0-rc.4",
|
|
76
|
+
"@fugood/node-llama-darwin-x64": "1.6.0-rc.4",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.6.0-rc.4",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.6.0-rc.4",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.6.0-rc.4",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.6.0-rc.4",
|
|
81
|
+
"@fugood/node-llama-linux-x64": "1.6.0-rc.4",
|
|
82
|
+
"@fugood/node-llama-linux-x64-cuda": "1.6.0-rc.4",
|
|
83
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.6.0-rc.4",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.6.0-rc.4",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.6.0-rc.4",
|
|
86
|
+
"@fugood/node-llama-win32-x64": "1.6.0-rc.4",
|
|
87
|
+
"@fugood/node-llama-win32-x64-cuda": "1.6.0-rc.4",
|
|
88
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.6.0-rc.4"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -16,7 +16,7 @@ index ae02c0bd7..f74d8bb26 100644
|
|
|
16
16
|
+
|
|
17
17
|
+target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
|
|
18
18
|
diff --git a/src/llama.cpp/common/chat-parser.cpp b/src/llama.cpp/common/chat-parser.cpp
|
|
19
|
-
index
|
|
19
|
+
index 29819e48d..2b6402489 100644
|
|
20
20
|
--- a/src/llama.cpp/common/chat-parser.cpp
|
|
21
21
|
+++ b/src/llama.cpp/common/chat-parser.cpp
|
|
22
22
|
@@ -1515,6 +1515,39 @@ static void common_chat_parse_exaone_moe(common_chat_msg_parser & builder) {
|
|
@@ -83,10 +83,10 @@ index 1bcba9cd8..b7cd68734 100644
|
|
|
83
83
|
static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
|
|
84
84
|
int count = 0;
|
|
85
85
|
diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
|
|
86
|
-
index
|
|
86
|
+
index eeb38ad06..363119f83 100644
|
|
87
87
|
--- a/src/llama.cpp/common/chat.cpp
|
|
88
88
|
+++ b/src/llama.cpp/common/chat.cpp
|
|
89
|
-
@@ -
|
|
89
|
+
@@ -574,6 +574,37 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm
|
|
90
90
|
return tmpls->template_default->source();
|
|
91
91
|
}
|
|
92
92
|
|
|
@@ -124,7 +124,7 @@ index b29544dac..52bfa0e20 100644
|
|
|
124
124
|
common_chat_templates_ptr common_chat_templates_init(
|
|
125
125
|
const struct llama_model * model,
|
|
126
126
|
const std::string & chat_template_override,
|
|
127
|
-
@@ -
|
|
127
|
+
@@ -699,6 +730,7 @@ const char * common_chat_format_name(common_chat_format format) {
|
|
128
128
|
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
|
|
129
129
|
case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
|
|
130
130
|
case COMMON_CHAT_FORMAT_EXAONE_MOE: return "EXAONE MoE";
|
|
@@ -132,7 +132,7 @@ index b29544dac..52bfa0e20 100644
|
|
|
132
132
|
case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
|
|
133
133
|
case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
|
|
134
134
|
case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
|
|
135
|
-
@@ -
|
|
135
|
+
@@ -790,8 +822,9 @@ static std::string apply(
|
|
136
136
|
if (inputs.add_generation_prompt) {
|
|
137
137
|
inp["add_generation_prompt"] = true;
|
|
138
138
|
}
|
|
@@ -144,7 +144,7 @@ index b29544dac..52bfa0e20 100644
|
|
|
144
144
|
}
|
|
145
145
|
|
|
146
146
|
jinja::global_from_json(ctx, inp, inputs.mark_input);
|
|
147
|
-
@@ -
|
|
147
|
+
@@ -2695,6 +2728,43 @@ static common_chat_params common_chat_params_init_translate_gemma(const common_c
|
|
148
148
|
return data;
|
|
149
149
|
}
|
|
150
150
|
|
|
@@ -185,10 +185,10 @@ index b29544dac..52bfa0e20 100644
|
|
|
185
185
|
+ return data;
|
|
186
186
|
+}
|
|
187
187
|
+
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
@@ -
|
|
188
|
+
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
189
|
+
common_chat_params data;
|
|
190
|
+
data.prompt = apply(tmpl, inputs);
|
|
191
|
+
@@ -3043,6 +3113,11 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
192
192
|
return common_chat_params_init_apriel_1_5(tmpl, params);
|
|
193
193
|
}
|
|
194
194
|
|
|
@@ -201,10 +201,10 @@ index b29544dac..52bfa0e20 100644
|
|
|
201
201
|
// TODO: support that mix in handlers below.
|
|
202
202
|
if ((params.tools.is_array() && params.json_schema.is_object())) {
|
|
203
203
|
diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
|
|
204
|
-
index
|
|
204
|
+
index 24aa4aab5..e02e22ae0 100644
|
|
205
205
|
--- a/src/llama.cpp/common/chat.h
|
|
206
206
|
+++ b/src/llama.cpp/common/chat.h
|
|
207
|
-
@@ -
|
|
207
|
+
@@ -133,6 +133,7 @@ enum common_chat_format {
|
|
208
208
|
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
|
|
209
209
|
COMMON_CHAT_FORMAT_SOLAR_OPEN,
|
|
210
210
|
COMMON_CHAT_FORMAT_EXAONE_MOE,
|
|
@@ -212,7 +212,7 @@ index ac19348ec..bd6030de8 100644
|
|
|
212
212
|
|
|
213
213
|
// These are intended to be parsed by the PEG parser
|
|
214
214
|
COMMON_CHAT_FORMAT_PEG_SIMPLE,
|
|
215
|
-
@@ -
|
|
215
|
+
@@ -238,6 +239,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
|
|
216
216
|
|
|
217
217
|
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
|
|
218
218
|
|
|
@@ -231,8 +231,8 @@ index ac19348ec..bd6030de8 100644
|
|
|
231
231
|
+bool common_chat_templates_has_variant(const struct common_chat_templates * tmpls, const std::string & variant);
|
|
232
232
|
+
|
|
233
233
|
// Parses a JSON array of messages in OpenAI's chat completion API format.
|
|
234
|
-
|
|
235
|
-
|
|
234
|
+
std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const nlohmann::ordered_json & messages);
|
|
235
|
+
nlohmann::ordered_json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text = false);
|
|
236
236
|
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
|
|
237
237
|
index 26250abb6..72ceddcc7 100644
|
|
238
238
|
--- a/src/llama.cpp/common/common.cpp
|
|
@@ -1231,6 +1231,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1231
1231
|
string_format("size of the prompt context (default: %d, 0 = loaded from model)", params.n_ctx),
|
|
1232
1232
|
[](common_params & params, int value) {
|
|
1233
1233
|
params.n_ctx = value;
|
|
1234
|
+
if (value == 0) {
|
|
1235
|
+
// disable context reduction in llama_params_fit if the user explicitly requests the full context size:
|
|
1236
|
+
params.fit_params_min_ctx = UINT32_MAX;
|
|
1237
|
+
}
|
|
1234
1238
|
}
|
|
1235
1239
|
).set_env("LLAMA_ARG_CTX_SIZE"));
|
|
1236
1240
|
add_opt(common_arg(
|
|
@@ -1573,7 +1577,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1573
1577
|
).set_sparam());
|
|
1574
1578
|
add_opt(common_arg(
|
|
1575
1579
|
{"--temp"}, "N",
|
|
1576
|
-
string_format("temperature (default: %.
|
|
1580
|
+
string_format("temperature (default: %.2f)", (double)params.sampling.temp),
|
|
1577
1581
|
[](common_params & params, const std::string & value) {
|
|
1578
1582
|
params.sampling.temp = std::stof(value);
|
|
1579
1583
|
params.sampling.temp = std::max(params.sampling.temp, 0.0f);
|
|
@@ -1590,7 +1594,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1590
1594
|
).set_sparam().set_env("LLAMA_ARG_TOP_K"));
|
|
1591
1595
|
add_opt(common_arg(
|
|
1592
1596
|
{"--top-p"}, "N",
|
|
1593
|
-
string_format("top-p sampling (default: %.
|
|
1597
|
+
string_format("top-p sampling (default: %.2f, 1.0 = disabled)", (double)params.sampling.top_p),
|
|
1594
1598
|
[](common_params & params, const std::string & value) {
|
|
1595
1599
|
params.sampling.top_p = std::stof(value);
|
|
1596
1600
|
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_P;
|
|
@@ -1598,7 +1602,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1598
1602
|
).set_sparam());
|
|
1599
1603
|
add_opt(common_arg(
|
|
1600
1604
|
{"--min-p"}, "N",
|
|
1601
|
-
string_format("min-p sampling (default: %.
|
|
1605
|
+
string_format("min-p sampling (default: %.2f, 0.0 = disabled)", (double)params.sampling.min_p),
|
|
1602
1606
|
[](common_params & params, const std::string & value) {
|
|
1603
1607
|
params.sampling.min_p = std::stof(value);
|
|
1604
1608
|
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIN_P;
|
|
@@ -1606,14 +1610,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1606
1610
|
).set_sparam());
|
|
1607
1611
|
add_opt(common_arg(
|
|
1608
1612
|
{"--top-nsigma"}, "N",
|
|
1609
|
-
string_format("top-n-sigma sampling (default: %.
|
|
1613
|
+
string_format("top-n-sigma sampling (default: %.2f, -1.0 = disabled)", params.sampling.top_n_sigma),
|
|
1610
1614
|
[](common_params & params, const std::string & value) {
|
|
1611
1615
|
params.sampling.top_n_sigma = std::stof(value);
|
|
1612
1616
|
}
|
|
1613
1617
|
).set_sparam());
|
|
1614
1618
|
add_opt(common_arg(
|
|
1615
1619
|
{"--xtc-probability"}, "N",
|
|
1616
|
-
string_format("xtc probability (default: %.
|
|
1620
|
+
string_format("xtc probability (default: %.2f, 0.0 = disabled)", (double)params.sampling.xtc_probability),
|
|
1617
1621
|
[](common_params & params, const std::string & value) {
|
|
1618
1622
|
params.sampling.xtc_probability = std::stof(value);
|
|
1619
1623
|
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY;
|
|
@@ -1621,7 +1625,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1621
1625
|
).set_sparam());
|
|
1622
1626
|
add_opt(common_arg(
|
|
1623
1627
|
{"--xtc-threshold"}, "N",
|
|
1624
|
-
string_format("xtc threshold (default: %.
|
|
1628
|
+
string_format("xtc threshold (default: %.2f, 1.0 = disabled)", (double)params.sampling.xtc_threshold),
|
|
1625
1629
|
[](common_params & params, const std::string & value) {
|
|
1626
1630
|
params.sampling.xtc_threshold = std::stof(value);
|
|
1627
1631
|
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD;
|
|
@@ -1629,7 +1633,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1629
1633
|
).set_sparam());
|
|
1630
1634
|
add_opt(common_arg(
|
|
1631
1635
|
{"--typical"}, "N",
|
|
1632
|
-
string_format("locally typical sampling, parameter p (default: %.
|
|
1636
|
+
string_format("locally typical sampling, parameter p (default: %.2f, 1.0 = disabled)", (double)params.sampling.typ_p),
|
|
1633
1637
|
[](common_params & params, const std::string & value) {
|
|
1634
1638
|
params.sampling.typ_p = std::stof(value);
|
|
1635
1639
|
}
|
|
@@ -1648,7 +1652,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1648
1652
|
).set_sparam());
|
|
1649
1653
|
add_opt(common_arg(
|
|
1650
1654
|
{"--repeat-penalty"}, "N",
|
|
1651
|
-
string_format("penalize repeat sequence of tokens (default: %.
|
|
1655
|
+
string_format("penalize repeat sequence of tokens (default: %.2f, 1.0 = disabled)", (double)params.sampling.penalty_repeat),
|
|
1652
1656
|
[](common_params & params, const std::string & value) {
|
|
1653
1657
|
params.sampling.penalty_repeat = std::stof(value);
|
|
1654
1658
|
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT;
|
|
@@ -1656,21 +1660,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1656
1660
|
).set_sparam());
|
|
1657
1661
|
add_opt(common_arg(
|
|
1658
1662
|
{"--presence-penalty"}, "N",
|
|
1659
|
-
string_format("repeat alpha presence penalty (default: %.
|
|
1663
|
+
string_format("repeat alpha presence penalty (default: %.2f, 0.0 = disabled)", (double)params.sampling.penalty_present),
|
|
1660
1664
|
[](common_params & params, const std::string & value) {
|
|
1661
1665
|
params.sampling.penalty_present = std::stof(value);
|
|
1662
1666
|
}
|
|
1663
1667
|
).set_sparam());
|
|
1664
1668
|
add_opt(common_arg(
|
|
1665
1669
|
{"--frequency-penalty"}, "N",
|
|
1666
|
-
string_format("repeat alpha frequency penalty (default: %.
|
|
1670
|
+
string_format("repeat alpha frequency penalty (default: %.2f, 0.0 = disabled)", (double)params.sampling.penalty_freq),
|
|
1667
1671
|
[](common_params & params, const std::string & value) {
|
|
1668
1672
|
params.sampling.penalty_freq = std::stof(value);
|
|
1669
1673
|
}
|
|
1670
1674
|
).set_sparam());
|
|
1671
1675
|
add_opt(common_arg(
|
|
1672
1676
|
{"--dry-multiplier"}, "N",
|
|
1673
|
-
string_format("set DRY sampling multiplier (default: %.
|
|
1677
|
+
string_format("set DRY sampling multiplier (default: %.2f, 0.0 = disabled)", (double)params.sampling.dry_multiplier),
|
|
1674
1678
|
[](common_params & params, const std::string & value) {
|
|
1675
1679
|
params.sampling.dry_multiplier = std::stof(value);
|
|
1676
1680
|
}
|
|
@@ -1751,14 +1755,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1751
1755
|
).set_sparam());
|
|
1752
1756
|
add_opt(common_arg(
|
|
1753
1757
|
{"--dynatemp-range"}, "N",
|
|
1754
|
-
string_format("dynamic temperature range (default: %.
|
|
1758
|
+
string_format("dynamic temperature range (default: %.2f, 0.0 = disabled)", (double)params.sampling.dynatemp_range),
|
|
1755
1759
|
[](common_params & params, const std::string & value) {
|
|
1756
1760
|
params.sampling.dynatemp_range = std::stof(value);
|
|
1757
1761
|
}
|
|
1758
1762
|
).set_sparam());
|
|
1759
1763
|
add_opt(common_arg(
|
|
1760
1764
|
{"--dynatemp-exp"}, "N",
|
|
1761
|
-
string_format("dynamic temperature exponent (default: %.
|
|
1765
|
+
string_format("dynamic temperature exponent (default: %.2f)", (double)params.sampling.dynatemp_exponent),
|
|
1762
1766
|
[](common_params & params, const std::string & value) {
|
|
1763
1767
|
params.sampling.dynatemp_exponent = std::stof(value);
|
|
1764
1768
|
}
|
|
@@ -1774,7 +1778,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1774
1778
|
).set_sparam());
|
|
1775
1779
|
add_opt(common_arg(
|
|
1776
1780
|
{"--mirostat-lr"}, "N",
|
|
1777
|
-
string_format("Mirostat learning rate, parameter eta (default: %.
|
|
1781
|
+
string_format("Mirostat learning rate, parameter eta (default: %.2f)", (double)params.sampling.mirostat_eta),
|
|
1778
1782
|
[](common_params & params, const std::string & value) {
|
|
1779
1783
|
params.sampling.mirostat_eta = std::stof(value);
|
|
1780
1784
|
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA;
|
|
@@ -1782,7 +1786,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1782
1786
|
).set_sparam());
|
|
1783
1787
|
add_opt(common_arg(
|
|
1784
1788
|
{"--mirostat-ent"}, "N",
|
|
1785
|
-
string_format("Mirostat target entropy, parameter tau (default: %.
|
|
1789
|
+
string_format("Mirostat target entropy, parameter tau (default: %.2f)", (double)params.sampling.mirostat_tau),
|
|
1786
1790
|
[](common_params & params, const std::string & value) {
|
|
1787
1791
|
params.sampling.mirostat_tau = std::stof(value);
|
|
1788
1792
|
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU;
|
|
@@ -1916,28 +1920,28 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1916
1920
|
).set_env("LLAMA_ARG_YARN_ORIG_CTX"));
|
|
1917
1921
|
add_opt(common_arg(
|
|
1918
1922
|
{"--yarn-ext-factor"}, "N",
|
|
1919
|
-
string_format("YaRN: extrapolation mix factor (default: %.
|
|
1923
|
+
string_format("YaRN: extrapolation mix factor (default: %.2f, 0.0 = full interpolation)", (double)params.yarn_ext_factor),
|
|
1920
1924
|
[](common_params & params, const std::string & value) {
|
|
1921
1925
|
params.yarn_ext_factor = std::stof(value);
|
|
1922
1926
|
}
|
|
1923
1927
|
).set_env("LLAMA_ARG_YARN_EXT_FACTOR"));
|
|
1924
1928
|
add_opt(common_arg(
|
|
1925
1929
|
{"--yarn-attn-factor"}, "N",
|
|
1926
|
-
string_format("YaRN: scale sqrt(t) or attention magnitude (default: %.
|
|
1930
|
+
string_format("YaRN: scale sqrt(t) or attention magnitude (default: %.2f)", (double)params.yarn_attn_factor),
|
|
1927
1931
|
[](common_params & params, const std::string & value) {
|
|
1928
1932
|
params.yarn_attn_factor = std::stof(value);
|
|
1929
1933
|
}
|
|
1930
1934
|
).set_env("LLAMA_ARG_YARN_ATTN_FACTOR"));
|
|
1931
1935
|
add_opt(common_arg(
|
|
1932
1936
|
{"--yarn-beta-slow"}, "N",
|
|
1933
|
-
string_format("YaRN: high correction dim or alpha (default: %.
|
|
1937
|
+
string_format("YaRN: high correction dim or alpha (default: %.2f)", (double)params.yarn_beta_slow),
|
|
1934
1938
|
[](common_params & params, const std::string & value) {
|
|
1935
1939
|
params.yarn_beta_slow = std::stof(value);
|
|
1936
1940
|
}
|
|
1937
1941
|
).set_env("LLAMA_ARG_YARN_BETA_SLOW"));
|
|
1938
1942
|
add_opt(common_arg(
|
|
1939
1943
|
{"--yarn-beta-fast"}, "N",
|
|
1940
|
-
string_format("YaRN: low correction dim or beta (default: %.
|
|
1944
|
+
string_format("YaRN: low correction dim or beta (default: %.2f)", (double)params.yarn_beta_fast),
|
|
1941
1945
|
[](common_params & params, const std::string & value) {
|
|
1942
1946
|
params.yarn_beta_fast = std::stof(value);
|
|
1943
1947
|
}
|
|
@@ -3331,14 +3335,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3331
3335
|
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_DRAFT_MIN"));
|
|
3332
3336
|
add_opt(common_arg(
|
|
3333
3337
|
{"--draft-p-split"}, "P",
|
|
3334
|
-
string_format("speculative decoding split probability (default: %.
|
|
3338
|
+
string_format("speculative decoding split probability (default: %.2f)", (double)params.speculative.p_split),
|
|
3335
3339
|
[](common_params & params, const std::string & value) {
|
|
3336
3340
|
params.speculative.p_split = std::stof(value);
|
|
3337
3341
|
}
|
|
3338
3342
|
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}).set_env("LLAMA_ARG_DRAFT_P_SPLIT"));
|
|
3339
3343
|
add_opt(common_arg(
|
|
3340
3344
|
{"--draft-p-min"}, "P",
|
|
3341
|
-
string_format("minimum speculative decoding probability (greedy) (default: %.
|
|
3345
|
+
string_format("minimum speculative decoding probability (greedy) (default: %.2f)", (double)params.speculative.p_min),
|
|
3342
3346
|
[](common_params & params, const std::string & value) {
|
|
3343
3347
|
params.speculative.p_min = std::stof(value);
|
|
3344
3348
|
}
|
|
@@ -1666,7 +1666,7 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
|
|
|
1666
1666
|
}
|
|
1667
1667
|
auto msg = builder.result();
|
|
1668
1668
|
if (!is_partial) {
|
|
1669
|
-
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat
|
|
1669
|
+
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
|
|
1670
1670
|
}
|
|
1671
1671
|
return msg;
|
|
1672
1672
|
}
|
|
@@ -1699,7 +1699,7 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std
|
|
|
1699
1699
|
mapper.from_ast(ctx.ast, result);
|
|
1700
1700
|
}
|
|
1701
1701
|
if (!is_partial) {
|
|
1702
|
-
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat
|
|
1702
|
+
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
|
|
1703
1703
|
}
|
|
1704
1704
|
return msg;
|
|
1705
1705
|
}
|