@fugood/llama.node 1.6.0-rc.2 → 1.6.0-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/package.json +15 -15
  2. package/scripts/llama.cpp.patch +15 -15
  3. package/src/llama.cpp/common/arg.cpp +25 -21
  4. package/src/llama.cpp/common/chat-parser.cpp +2 -2
  5. package/src/llama.cpp/common/chat.cpp +159 -139
  6. package/src/llama.cpp/common/chat.h +16 -9
  7. package/src/llama.cpp/common/jinja/caps.cpp +48 -5
  8. package/src/llama.cpp/common/jinja/caps.h +5 -1
  9. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +539 -7
  10. package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +26 -12
  11. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +8 -0
  12. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +6 -3
  13. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +289 -1
  14. package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +345 -15
  15. package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +21 -4
  16. package/src/llama.cpp/include/llama.h +3 -2
  17. package/src/llama.cpp/src/llama-context.cpp +6 -5
  18. package/src/llama.cpp/src/llama-graph.cpp +159 -18
  19. package/src/llama.cpp/src/llama-graph.h +54 -3
  20. package/src/llama.cpp/src/llama-hparams.cpp +17 -2
  21. package/src/llama.cpp/src/llama-hparams.h +10 -4
  22. package/src/llama.cpp/src/llama-kv-cache.cpp +34 -10
  23. package/src/llama.cpp/src/llama-model-saver.cpp +2 -2
  24. package/src/llama.cpp/src/llama-model.cpp +14 -16
  25. package/src/llama.cpp/src/llama-quant.cpp +53 -56
  26. package/src/llama.cpp/src/llama.cpp +50 -16
  27. package/src/llama.cpp/src/models/deepseek2.cpp +14 -14
  28. package/src/llama.cpp/src/models/gemma3n-iswa.cpp +2 -2
  29. package/src/llama.cpp/src/models/minicpm3.cpp +1 -0
  30. package/src/llama.cpp/src/models/plm.cpp +1 -0
  31. package/src/llama.cpp/src/models/qwen3vl-moe.cpp +5 -14
  32. package/src/llama.cpp/src/models/qwen3vl.cpp +5 -14
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.6.0-rc.2",
4
+ "version": "1.6.0-rc.4",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,20 +72,20 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-darwin-arm64": "1.6.0-rc.2",
76
- "@fugood/node-llama-darwin-x64": "1.6.0-rc.2",
77
- "@fugood/node-llama-linux-arm64": "1.6.0-rc.2",
78
- "@fugood/node-llama-linux-arm64-cuda": "1.6.0-rc.2",
79
- "@fugood/node-llama-linux-arm64-snapdragon": "1.6.0-rc.2",
80
- "@fugood/node-llama-linux-arm64-vulkan": "1.6.0-rc.2",
81
- "@fugood/node-llama-linux-x64": "1.6.0-rc.2",
82
- "@fugood/node-llama-linux-x64-cuda": "1.6.0-rc.2",
83
- "@fugood/node-llama-linux-x64-vulkan": "1.6.0-rc.2",
84
- "@fugood/node-llama-win32-arm64": "1.6.0-rc.2",
85
- "@fugood/node-llama-win32-arm64-vulkan": "1.6.0-rc.2",
86
- "@fugood/node-llama-win32-x64": "1.6.0-rc.2",
87
- "@fugood/node-llama-win32-x64-cuda": "1.6.0-rc.2",
88
- "@fugood/node-llama-win32-x64-vulkan": "1.6.0-rc.2"
75
+ "@fugood/node-llama-darwin-arm64": "1.6.0-rc.4",
76
+ "@fugood/node-llama-darwin-x64": "1.6.0-rc.4",
77
+ "@fugood/node-llama-linux-arm64": "1.6.0-rc.4",
78
+ "@fugood/node-llama-linux-arm64-cuda": "1.6.0-rc.4",
79
+ "@fugood/node-llama-linux-arm64-snapdragon": "1.6.0-rc.4",
80
+ "@fugood/node-llama-linux-arm64-vulkan": "1.6.0-rc.4",
81
+ "@fugood/node-llama-linux-x64": "1.6.0-rc.4",
82
+ "@fugood/node-llama-linux-x64-cuda": "1.6.0-rc.4",
83
+ "@fugood/node-llama-linux-x64-vulkan": "1.6.0-rc.4",
84
+ "@fugood/node-llama-win32-arm64": "1.6.0-rc.4",
85
+ "@fugood/node-llama-win32-arm64-vulkan": "1.6.0-rc.4",
86
+ "@fugood/node-llama-win32-x64": "1.6.0-rc.4",
87
+ "@fugood/node-llama-win32-x64-cuda": "1.6.0-rc.4",
88
+ "@fugood/node-llama-win32-x64-vulkan": "1.6.0-rc.4"
89
89
  },
90
90
  "devDependencies": {
91
91
  "@babel/preset-env": "^7.24.4",
@@ -16,7 +16,7 @@ index ae02c0bd7..f74d8bb26 100644
16
16
  +
17
17
  +target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
18
18
  diff --git a/src/llama.cpp/common/chat-parser.cpp b/src/llama.cpp/common/chat-parser.cpp
19
- index c2d1e30f3..e520bf26c 100644
19
+ index 29819e48d..2b6402489 100644
20
20
  --- a/src/llama.cpp/common/chat-parser.cpp
21
21
  +++ b/src/llama.cpp/common/chat-parser.cpp
22
22
  @@ -1515,6 +1515,39 @@ static void common_chat_parse_exaone_moe(common_chat_msg_parser & builder) {
@@ -83,10 +83,10 @@ index 1bcba9cd8..b7cd68734 100644
83
83
  static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
84
84
  int count = 0;
85
85
  diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
86
- index b29544dac..52bfa0e20 100644
86
+ index eeb38ad06..363119f83 100644
87
87
  --- a/src/llama.cpp/common/chat.cpp
88
88
  +++ b/src/llama.cpp/common/chat.cpp
89
- @@ -615,6 +615,37 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm
89
+ @@ -574,6 +574,37 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm
90
90
  return tmpls->template_default->source();
91
91
  }
92
92
 
@@ -124,7 +124,7 @@ index b29544dac..52bfa0e20 100644
124
124
  common_chat_templates_ptr common_chat_templates_init(
125
125
  const struct llama_model * model,
126
126
  const std::string & chat_template_override,
127
- @@ -740,6 +771,7 @@ const char * common_chat_format_name(common_chat_format format) {
127
+ @@ -699,6 +730,7 @@ const char * common_chat_format_name(common_chat_format format) {
128
128
  case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
129
129
  case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
130
130
  case COMMON_CHAT_FORMAT_EXAONE_MOE: return "EXAONE MoE";
@@ -132,7 +132,7 @@ index b29544dac..52bfa0e20 100644
132
132
  case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
133
133
  case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
134
134
  case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
135
- @@ -831,8 +863,9 @@ static std::string apply(
135
+ @@ -790,8 +822,9 @@ static std::string apply(
136
136
  if (inputs.add_generation_prompt) {
137
137
  inp["add_generation_prompt"] = true;
138
138
  }
@@ -144,7 +144,7 @@ index b29544dac..52bfa0e20 100644
144
144
  }
145
145
 
146
146
  jinja::global_from_json(ctx, inp, inputs.mark_input);
147
- @@ -2761,6 +2794,43 @@ static common_chat_params common_chat_params_init_seed_oss(
147
+ @@ -2695,6 +2728,43 @@ static common_chat_params common_chat_params_init_translate_gemma(const common_c
148
148
  return data;
149
149
  }
150
150
 
@@ -185,10 +185,10 @@ index b29544dac..52bfa0e20 100644
185
185
  + return data;
186
186
  +}
187
187
  +
188
- // various workarounds for known issues with certain templates or model behaviors
189
- // TODO @ngxson : improve this (how?)
190
- namespace workaround {
191
- @@ -3035,6 +3105,11 @@ static common_chat_params common_chat_templates_apply_jinja(
188
+ static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
189
+ common_chat_params data;
190
+ data.prompt = apply(tmpl, inputs);
191
+ @@ -3043,6 +3113,11 @@ static common_chat_params common_chat_templates_apply_jinja(
192
192
  return common_chat_params_init_apriel_1_5(tmpl, params);
193
193
  }
194
194
 
@@ -201,10 +201,10 @@ index b29544dac..52bfa0e20 100644
201
201
  // TODO: support that mix in handlers below.
202
202
  if ((params.tools.is_array() && params.json_schema.is_object())) {
203
203
  diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
204
- index ac19348ec..bd6030de8 100644
204
+ index 24aa4aab5..e02e22ae0 100644
205
205
  --- a/src/llama.cpp/common/chat.h
206
206
  +++ b/src/llama.cpp/common/chat.h
207
- @@ -126,6 +126,7 @@ enum common_chat_format {
207
+ @@ -133,6 +133,7 @@ enum common_chat_format {
208
208
  COMMON_CHAT_FORMAT_XIAOMI_MIMO,
209
209
  COMMON_CHAT_FORMAT_SOLAR_OPEN,
210
210
  COMMON_CHAT_FORMAT_EXAONE_MOE,
@@ -212,7 +212,7 @@ index ac19348ec..bd6030de8 100644
212
212
 
213
213
  // These are intended to be parsed by the PEG parser
214
214
  COMMON_CHAT_FORMAT_PEG_SIMPLE,
215
- @@ -231,6 +232,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
215
+ @@ -238,6 +239,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
216
216
 
217
217
  bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
218
218
 
@@ -231,8 +231,8 @@ index ac19348ec..bd6030de8 100644
231
231
  +bool common_chat_templates_has_variant(const struct common_chat_templates * tmpls, const std::string & variant);
232
232
  +
233
233
  // Parses a JSON array of messages in OpenAI's chat completion API format.
234
- // T can be std::string containing JSON or nlohmann::ordered_json
235
- template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages);
234
+ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const nlohmann::ordered_json & messages);
235
+ nlohmann::ordered_json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text = false);
236
236
  diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
237
237
  index 26250abb6..72ceddcc7 100644
238
238
  --- a/src/llama.cpp/common/common.cpp
@@ -1231,6 +1231,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1231
1231
  string_format("size of the prompt context (default: %d, 0 = loaded from model)", params.n_ctx),
1232
1232
  [](common_params & params, int value) {
1233
1233
  params.n_ctx = value;
1234
+ if (value == 0) {
1235
+ // disable context reduction in llama_params_fit if the user explicitly requests the full context size:
1236
+ params.fit_params_min_ctx = UINT32_MAX;
1237
+ }
1234
1238
  }
1235
1239
  ).set_env("LLAMA_ARG_CTX_SIZE"));
1236
1240
  add_opt(common_arg(
@@ -1573,7 +1577,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1573
1577
  ).set_sparam());
1574
1578
  add_opt(common_arg(
1575
1579
  {"--temp"}, "N",
1576
- string_format("temperature (default: %.1f)", (double)params.sampling.temp),
1580
+ string_format("temperature (default: %.2f)", (double)params.sampling.temp),
1577
1581
  [](common_params & params, const std::string & value) {
1578
1582
  params.sampling.temp = std::stof(value);
1579
1583
  params.sampling.temp = std::max(params.sampling.temp, 0.0f);
@@ -1590,7 +1594,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1590
1594
  ).set_sparam().set_env("LLAMA_ARG_TOP_K"));
1591
1595
  add_opt(common_arg(
1592
1596
  {"--top-p"}, "N",
1593
- string_format("top-p sampling (default: %.1f, 1.0 = disabled)", (double)params.sampling.top_p),
1597
+ string_format("top-p sampling (default: %.2f, 1.0 = disabled)", (double)params.sampling.top_p),
1594
1598
  [](common_params & params, const std::string & value) {
1595
1599
  params.sampling.top_p = std::stof(value);
1596
1600
  params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_P;
@@ -1598,7 +1602,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1598
1602
  ).set_sparam());
1599
1603
  add_opt(common_arg(
1600
1604
  {"--min-p"}, "N",
1601
- string_format("min-p sampling (default: %.1f, 0.0 = disabled)", (double)params.sampling.min_p),
1605
+ string_format("min-p sampling (default: %.2f, 0.0 = disabled)", (double)params.sampling.min_p),
1602
1606
  [](common_params & params, const std::string & value) {
1603
1607
  params.sampling.min_p = std::stof(value);
1604
1608
  params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIN_P;
@@ -1606,14 +1610,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1606
1610
  ).set_sparam());
1607
1611
  add_opt(common_arg(
1608
1612
  {"--top-nsigma"}, "N",
1609
- string_format("top-n-sigma sampling (default: %.1f, -1.0 = disabled)", params.sampling.top_n_sigma),
1613
+ string_format("top-n-sigma sampling (default: %.2f, -1.0 = disabled)", params.sampling.top_n_sigma),
1610
1614
  [](common_params & params, const std::string & value) {
1611
1615
  params.sampling.top_n_sigma = std::stof(value);
1612
1616
  }
1613
1617
  ).set_sparam());
1614
1618
  add_opt(common_arg(
1615
1619
  {"--xtc-probability"}, "N",
1616
- string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sampling.xtc_probability),
1620
+ string_format("xtc probability (default: %.2f, 0.0 = disabled)", (double)params.sampling.xtc_probability),
1617
1621
  [](common_params & params, const std::string & value) {
1618
1622
  params.sampling.xtc_probability = std::stof(value);
1619
1623
  params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY;
@@ -1621,7 +1625,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1621
1625
  ).set_sparam());
1622
1626
  add_opt(common_arg(
1623
1627
  {"--xtc-threshold"}, "N",
1624
- string_format("xtc threshold (default: %.1f, 1.0 = disabled)", (double)params.sampling.xtc_threshold),
1628
+ string_format("xtc threshold (default: %.2f, 1.0 = disabled)", (double)params.sampling.xtc_threshold),
1625
1629
  [](common_params & params, const std::string & value) {
1626
1630
  params.sampling.xtc_threshold = std::stof(value);
1627
1631
  params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD;
@@ -1629,7 +1633,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1629
1633
  ).set_sparam());
1630
1634
  add_opt(common_arg(
1631
1635
  {"--typical"}, "N",
1632
- string_format("locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)", (double)params.sampling.typ_p),
1636
+ string_format("locally typical sampling, parameter p (default: %.2f, 1.0 = disabled)", (double)params.sampling.typ_p),
1633
1637
  [](common_params & params, const std::string & value) {
1634
1638
  params.sampling.typ_p = std::stof(value);
1635
1639
  }
@@ -1648,7 +1652,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1648
1652
  ).set_sparam());
1649
1653
  add_opt(common_arg(
1650
1654
  {"--repeat-penalty"}, "N",
1651
- string_format("penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)", (double)params.sampling.penalty_repeat),
1655
+ string_format("penalize repeat sequence of tokens (default: %.2f, 1.0 = disabled)", (double)params.sampling.penalty_repeat),
1652
1656
  [](common_params & params, const std::string & value) {
1653
1657
  params.sampling.penalty_repeat = std::stof(value);
1654
1658
  params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT;
@@ -1656,21 +1660,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1656
1660
  ).set_sparam());
1657
1661
  add_opt(common_arg(
1658
1662
  {"--presence-penalty"}, "N",
1659
- string_format("repeat alpha presence penalty (default: %.1f, 0.0 = disabled)", (double)params.sampling.penalty_present),
1663
+ string_format("repeat alpha presence penalty (default: %.2f, 0.0 = disabled)", (double)params.sampling.penalty_present),
1660
1664
  [](common_params & params, const std::string & value) {
1661
1665
  params.sampling.penalty_present = std::stof(value);
1662
1666
  }
1663
1667
  ).set_sparam());
1664
1668
  add_opt(common_arg(
1665
1669
  {"--frequency-penalty"}, "N",
1666
- string_format("repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)", (double)params.sampling.penalty_freq),
1670
+ string_format("repeat alpha frequency penalty (default: %.2f, 0.0 = disabled)", (double)params.sampling.penalty_freq),
1667
1671
  [](common_params & params, const std::string & value) {
1668
1672
  params.sampling.penalty_freq = std::stof(value);
1669
1673
  }
1670
1674
  ).set_sparam());
1671
1675
  add_opt(common_arg(
1672
1676
  {"--dry-multiplier"}, "N",
1673
- string_format("set DRY sampling multiplier (default: %.1f, 0.0 = disabled)", (double)params.sampling.dry_multiplier),
1677
+ string_format("set DRY sampling multiplier (default: %.2f, 0.0 = disabled)", (double)params.sampling.dry_multiplier),
1674
1678
  [](common_params & params, const std::string & value) {
1675
1679
  params.sampling.dry_multiplier = std::stof(value);
1676
1680
  }
@@ -1751,14 +1755,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1751
1755
  ).set_sparam());
1752
1756
  add_opt(common_arg(
1753
1757
  {"--dynatemp-range"}, "N",
1754
- string_format("dynamic temperature range (default: %.1f, 0.0 = disabled)", (double)params.sampling.dynatemp_range),
1758
+ string_format("dynamic temperature range (default: %.2f, 0.0 = disabled)", (double)params.sampling.dynatemp_range),
1755
1759
  [](common_params & params, const std::string & value) {
1756
1760
  params.sampling.dynatemp_range = std::stof(value);
1757
1761
  }
1758
1762
  ).set_sparam());
1759
1763
  add_opt(common_arg(
1760
1764
  {"--dynatemp-exp"}, "N",
1761
- string_format("dynamic temperature exponent (default: %.1f)", (double)params.sampling.dynatemp_exponent),
1765
+ string_format("dynamic temperature exponent (default: %.2f)", (double)params.sampling.dynatemp_exponent),
1762
1766
  [](common_params & params, const std::string & value) {
1763
1767
  params.sampling.dynatemp_exponent = std::stof(value);
1764
1768
  }
@@ -1774,7 +1778,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1774
1778
  ).set_sparam());
1775
1779
  add_opt(common_arg(
1776
1780
  {"--mirostat-lr"}, "N",
1777
- string_format("Mirostat learning rate, parameter eta (default: %.1f)", (double)params.sampling.mirostat_eta),
1781
+ string_format("Mirostat learning rate, parameter eta (default: %.2f)", (double)params.sampling.mirostat_eta),
1778
1782
  [](common_params & params, const std::string & value) {
1779
1783
  params.sampling.mirostat_eta = std::stof(value);
1780
1784
  params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA;
@@ -1782,7 +1786,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1782
1786
  ).set_sparam());
1783
1787
  add_opt(common_arg(
1784
1788
  {"--mirostat-ent"}, "N",
1785
- string_format("Mirostat target entropy, parameter tau (default: %.1f)", (double)params.sampling.mirostat_tau),
1789
+ string_format("Mirostat target entropy, parameter tau (default: %.2f)", (double)params.sampling.mirostat_tau),
1786
1790
  [](common_params & params, const std::string & value) {
1787
1791
  params.sampling.mirostat_tau = std::stof(value);
1788
1792
  params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU;
@@ -1916,28 +1920,28 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
1916
1920
  ).set_env("LLAMA_ARG_YARN_ORIG_CTX"));
1917
1921
  add_opt(common_arg(
1918
1922
  {"--yarn-ext-factor"}, "N",
1919
- string_format("YaRN: extrapolation mix factor (default: %.1f, 0.0 = full interpolation)", (double)params.yarn_ext_factor),
1923
+ string_format("YaRN: extrapolation mix factor (default: %.2f, 0.0 = full interpolation)", (double)params.yarn_ext_factor),
1920
1924
  [](common_params & params, const std::string & value) {
1921
1925
  params.yarn_ext_factor = std::stof(value);
1922
1926
  }
1923
1927
  ).set_env("LLAMA_ARG_YARN_EXT_FACTOR"));
1924
1928
  add_opt(common_arg(
1925
1929
  {"--yarn-attn-factor"}, "N",
1926
- string_format("YaRN: scale sqrt(t) or attention magnitude (default: %.1f)", (double)params.yarn_attn_factor),
1930
+ string_format("YaRN: scale sqrt(t) or attention magnitude (default: %.2f)", (double)params.yarn_attn_factor),
1927
1931
  [](common_params & params, const std::string & value) {
1928
1932
  params.yarn_attn_factor = std::stof(value);
1929
1933
  }
1930
1934
  ).set_env("LLAMA_ARG_YARN_ATTN_FACTOR"));
1931
1935
  add_opt(common_arg(
1932
1936
  {"--yarn-beta-slow"}, "N",
1933
- string_format("YaRN: high correction dim or alpha (default: %.1f)", (double)params.yarn_beta_slow),
1937
+ string_format("YaRN: high correction dim or alpha (default: %.2f)", (double)params.yarn_beta_slow),
1934
1938
  [](common_params & params, const std::string & value) {
1935
1939
  params.yarn_beta_slow = std::stof(value);
1936
1940
  }
1937
1941
  ).set_env("LLAMA_ARG_YARN_BETA_SLOW"));
1938
1942
  add_opt(common_arg(
1939
1943
  {"--yarn-beta-fast"}, "N",
1940
- string_format("YaRN: low correction dim or beta (default: %.1f)", (double)params.yarn_beta_fast),
1944
+ string_format("YaRN: low correction dim or beta (default: %.2f)", (double)params.yarn_beta_fast),
1941
1945
  [](common_params & params, const std::string & value) {
1942
1946
  params.yarn_beta_fast = std::stof(value);
1943
1947
  }
@@ -3331,14 +3335,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
3331
3335
  ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_DRAFT_MIN"));
3332
3336
  add_opt(common_arg(
3333
3337
  {"--draft-p-split"}, "P",
3334
- string_format("speculative decoding split probability (default: %.1f)", (double)params.speculative.p_split),
3338
+ string_format("speculative decoding split probability (default: %.2f)", (double)params.speculative.p_split),
3335
3339
  [](common_params & params, const std::string & value) {
3336
3340
  params.speculative.p_split = std::stof(value);
3337
3341
  }
3338
3342
  ).set_examples({LLAMA_EXAMPLE_SPECULATIVE}).set_env("LLAMA_ARG_DRAFT_P_SPLIT"));
3339
3343
  add_opt(common_arg(
3340
3344
  {"--draft-p-min"}, "P",
3341
- string_format("minimum speculative decoding probability (greedy) (default: %.1f)", (double)params.speculative.p_min),
3345
+ string_format("minimum speculative decoding probability (greedy) (default: %.2f)", (double)params.speculative.p_min),
3342
3346
  [](common_params & params, const std::string & value) {
3343
3347
  params.speculative.p_min = std::stof(value);
3344
3348
  }
@@ -1666,7 +1666,7 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
1666
1666
  }
1667
1667
  auto msg = builder.result();
1668
1668
  if (!is_partial) {
1669
- LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
1669
+ LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
1670
1670
  }
1671
1671
  return msg;
1672
1672
  }
@@ -1699,7 +1699,7 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std
1699
1699
  mapper.from_ast(ctx.ast, result);
1700
1700
  }
1701
1701
  if (!is_partial) {
1702
- LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
1702
+ LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
1703
1703
  }
1704
1704
  return msg;
1705
1705
  }