npm - @fugood/llama.node - Versions diffs - 1.6.0-rc.2 → 1.6.0-rc.4 - Mend

@fugood/llama.node 1.6.0-rc.2 → 1.6.0-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/package.json +15 -15
package/scripts/llama.cpp.patch +15 -15
package/src/llama.cpp/common/arg.cpp +25 -21
package/src/llama.cpp/common/chat-parser.cpp +2 -2
package/src/llama.cpp/common/chat.cpp +159 -139
package/src/llama.cpp/common/chat.h +16 -9
package/src/llama.cpp/common/jinja/caps.cpp +48 -5
package/src/llama.cpp/common/jinja/caps.h +5 -1
package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +539 -7
package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +26 -12
package/src/llama.cpp/ggml/src/ggml-cpu/common.h +8 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +6 -3
package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +289 -1
package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +345 -15
package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +21 -4
package/src/llama.cpp/include/llama.h +3 -2
package/src/llama.cpp/src/llama-context.cpp +6 -5
package/src/llama.cpp/src/llama-graph.cpp +159 -18
package/src/llama.cpp/src/llama-graph.h +54 -3
package/src/llama.cpp/src/llama-hparams.cpp +17 -2
package/src/llama.cpp/src/llama-hparams.h +10 -4
package/src/llama.cpp/src/llama-kv-cache.cpp +34 -10
package/src/llama.cpp/src/llama-model-saver.cpp +2 -2
package/src/llama.cpp/src/llama-model.cpp +14 -16
package/src/llama.cpp/src/llama-quant.cpp +53 -56
package/src/llama.cpp/src/llama.cpp +50 -16
package/src/llama.cpp/src/models/deepseek2.cpp +14 -14
package/src/llama.cpp/src/models/gemma3n-iswa.cpp +2 -2
package/src/llama.cpp/src/models/minicpm3.cpp +1 -0
package/src/llama.cpp/src/models/plm.cpp +1 -0
package/src/llama.cpp/src/models/qwen3vl-moe.cpp +5 -14
package/src/llama.cpp/src/models/qwen3vl.cpp +5 -14

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@fugood/llama.node",
   "access": "public",
-  "version": "1.6.0-rc.2",
+  "version": "1.6.0-rc.4",
   "description": "An another Node binding of llama.cpp",
   "main": "lib/index.js",
   "scripts": {
@@ -72,20 +72,20 @@
     "CMakeLists.txt"
   ],
   "optionalDependencies": {
-    "@fugood/node-llama-darwin-arm64": "1.6.0-rc.2",
-    "@fugood/node-llama-darwin-x64": "1.6.0-rc.2",
-    "@fugood/node-llama-linux-arm64": "1.6.0-rc.2",
-    "@fugood/node-llama-linux-arm64-cuda": "1.6.0-rc.2",
-    "@fugood/node-llama-linux-arm64-snapdragon": "1.6.0-rc.2",
-    "@fugood/node-llama-linux-arm64-vulkan": "1.6.0-rc.2",
-    "@fugood/node-llama-linux-x64": "1.6.0-rc.2",
-    "@fugood/node-llama-linux-x64-cuda": "1.6.0-rc.2",
-    "@fugood/node-llama-linux-x64-vulkan": "1.6.0-rc.2",
-    "@fugood/node-llama-win32-arm64": "1.6.0-rc.2",
-    "@fugood/node-llama-win32-arm64-vulkan": "1.6.0-rc.2",
-    "@fugood/node-llama-win32-x64": "1.6.0-rc.2",
-    "@fugood/node-llama-win32-x64-cuda": "1.6.0-rc.2",
-    "@fugood/node-llama-win32-x64-vulkan": "1.6.0-rc.2"
+    "@fugood/node-llama-darwin-arm64": "1.6.0-rc.4",
+    "@fugood/node-llama-darwin-x64": "1.6.0-rc.4",
+    "@fugood/node-llama-linux-arm64": "1.6.0-rc.4",
+    "@fugood/node-llama-linux-arm64-cuda": "1.6.0-rc.4",
+    "@fugood/node-llama-linux-arm64-snapdragon": "1.6.0-rc.4",
+    "@fugood/node-llama-linux-arm64-vulkan": "1.6.0-rc.4",
+    "@fugood/node-llama-linux-x64": "1.6.0-rc.4",
+    "@fugood/node-llama-linux-x64-cuda": "1.6.0-rc.4",
+    "@fugood/node-llama-linux-x64-vulkan": "1.6.0-rc.4",
+    "@fugood/node-llama-win32-arm64": "1.6.0-rc.4",
+    "@fugood/node-llama-win32-arm64-vulkan": "1.6.0-rc.4",
+    "@fugood/node-llama-win32-x64": "1.6.0-rc.4",
+    "@fugood/node-llama-win32-x64-cuda": "1.6.0-rc.4",
+    "@fugood/node-llama-win32-x64-vulkan": "1.6.0-rc.4"
   },
   "devDependencies": {
     "@babel/preset-env": "^7.24.4",

package/scripts/llama.cpp.patch CHANGED Viewed

@@ -16,7 +16,7 @@ index ae02c0bd7..f74d8bb26 100644
 +
 +target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
 diff --git a/src/llama.cpp/common/chat-parser.cpp b/src/llama.cpp/common/chat-parser.cpp
-index c2d1e30f3..e520bf26c 100644
+index 29819e48d..2b6402489 100644
 --- a/src/llama.cpp/common/chat-parser.cpp
 +++ b/src/llama.cpp/common/chat-parser.cpp
@@ -1515,6 +1515,39 @@ static void common_chat_parse_exaone_moe(common_chat_msg_parser & builder) {
@@ -83,10 +83,10 @@ index 1bcba9cd8..b7cd68734 100644
  static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
      int count = 0;
 diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
-index b29544dac..52bfa0e20 100644
+index eeb38ad06..363119f83 100644
 --- a/src/llama.cpp/common/chat.cpp
 +++ b/src/llama.cpp/common/chat.cpp
-@@ -615,6 +615,37 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm
+@@ -574,6 +574,37 @@ std::string common_chat_templates_source(const struct common_chat_templates * tm
      return tmpls->template_default->source();
  }
@@ -124,7 +124,7 @@ index b29544dac..52bfa0e20 100644
  common_chat_templates_ptr common_chat_templates_init(
      const struct llama_model * model,
      const std::string & chat_template_override,
-@@ -740,6 +771,7 @@ const char * common_chat_format_name(common_chat_format format) {
+@@ -699,6 +730,7 @@ const char * common_chat_format_name(common_chat_format format) {
          case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
          case COMMON_CHAT_FORMAT_SOLAR_OPEN: return "Solar Open";
          case COMMON_CHAT_FORMAT_EXAONE_MOE: return "EXAONE MoE";
@@ -132,7 +132,7 @@ index b29544dac..52bfa0e20 100644
          case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
          case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
          case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
-@@ -831,8 +863,9 @@ static std::string apply(
+@@ -790,8 +822,9 @@ static std::string apply(
      if (inputs.add_generation_prompt) {
          inp["add_generation_prompt"] = true;
      }
@@ -144,7 +144,7 @@ index b29544dac..52bfa0e20 100644
      }
      jinja::global_from_json(ctx, inp, inputs.mark_input);
-@@ -2761,6 +2794,43 @@ static common_chat_params common_chat_params_init_seed_oss(
+@@ -2695,6 +2728,43 @@ static common_chat_params common_chat_params_init_translate_gemma(const common_c
      return data;
  }
@@ -185,10 +185,10 @@ index b29544dac..52bfa0e20 100644
 +    return data;
 +}
 +
- // various workarounds for known issues with certain templates or model behaviors
- // TODO @ngxson : improve this (how?)
- namespace workaround {
-@@ -3035,6 +3105,11 @@ static common_chat_params common_chat_templates_apply_jinja(
+ static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
+     common_chat_params data;
+     data.prompt = apply(tmpl, inputs);
+@@ -3043,6 +3113,11 @@ static common_chat_params common_chat_templates_apply_jinja(
          return common_chat_params_init_apriel_1_5(tmpl, params);
      }
@@ -201,10 +201,10 @@ index b29544dac..52bfa0e20 100644
      // TODO: support that mix in handlers below.
      if ((params.tools.is_array() && params.json_schema.is_object())) {
 diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
-index ac19348ec..bd6030de8 100644
+index 24aa4aab5..e02e22ae0 100644
 --- a/src/llama.cpp/common/chat.h
 +++ b/src/llama.cpp/common/chat.h
-@@ -126,6 +126,7 @@ enum common_chat_format {
+@@ -133,6 +133,7 @@ enum common_chat_format {
      COMMON_CHAT_FORMAT_XIAOMI_MIMO,
      COMMON_CHAT_FORMAT_SOLAR_OPEN,
      COMMON_CHAT_FORMAT_EXAONE_MOE,
@@ -212,7 +212,7 @@ index ac19348ec..bd6030de8 100644
      // These are intended to be parsed by the PEG parser
      COMMON_CHAT_FORMAT_PEG_SIMPLE,
-@@ -231,6 +232,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
+@@ -238,6 +239,20 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
  bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
@@ -231,8 +231,8 @@ index ac19348ec..bd6030de8 100644
 +bool common_chat_templates_has_variant(const struct common_chat_templates * tmpls, const std::string & variant);
 +
  // Parses a JSON array of messages in OpenAI's chat completion API format.
- // T can be std::string containing JSON or nlohmann::ordered_json
- template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages);
+ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const nlohmann::ordered_json & messages);
+ nlohmann::ordered_json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text = false);
 diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
 index 26250abb6..72ceddcc7 100644
 --- a/src/llama.cpp/common/common.cpp

package/src/llama.cpp/common/arg.cpp CHANGED Viewed

@@ -1231,6 +1231,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         string_format("size of the prompt context (default: %d, 0 = loaded from model)", params.n_ctx),
         [](common_params & params, int value) {
             params.n_ctx = value;
+            if (value == 0) {
+                // disable context reduction in llama_params_fit if the user explicitly requests the full context size:
+                params.fit_params_min_ctx = UINT32_MAX;
+            }
         }
     ).set_env("LLAMA_ARG_CTX_SIZE"));
     add_opt(common_arg(
@@ -1573,7 +1577,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--temp"}, "N",
-        string_format("temperature (default: %.1f)", (double)params.sampling.temp),
+        string_format("temperature (default: %.2f)", (double)params.sampling.temp),
         [](common_params & params, const std::string & value) {
             params.sampling.temp = std::stof(value);
             params.sampling.temp = std::max(params.sampling.temp, 0.0f);
@@ -1590,7 +1594,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam().set_env("LLAMA_ARG_TOP_K"));
     add_opt(common_arg(
         {"--top-p"}, "N",
-        string_format("top-p sampling (default: %.1f, 1.0 = disabled)", (double)params.sampling.top_p),
+        string_format("top-p sampling (default: %.2f, 1.0 = disabled)", (double)params.sampling.top_p),
         [](common_params & params, const std::string & value) {
             params.sampling.top_p = std::stof(value);
             params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_P;
@@ -1598,7 +1602,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--min-p"}, "N",
-        string_format("min-p sampling (default: %.1f, 0.0 = disabled)", (double)params.sampling.min_p),
+        string_format("min-p sampling (default: %.2f, 0.0 = disabled)", (double)params.sampling.min_p),
         [](common_params & params, const std::string & value) {
             params.sampling.min_p = std::stof(value);
             params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIN_P;
@@ -1606,14 +1610,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--top-nsigma"}, "N",
-        string_format("top-n-sigma sampling (default: %.1f, -1.0 = disabled)", params.sampling.top_n_sigma),
+        string_format("top-n-sigma sampling (default: %.2f, -1.0 = disabled)", params.sampling.top_n_sigma),
         [](common_params & params, const std::string & value) {
             params.sampling.top_n_sigma = std::stof(value);
         }
     ).set_sparam());
     add_opt(common_arg(
         {"--xtc-probability"}, "N",
-        string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sampling.xtc_probability),
+        string_format("xtc probability (default: %.2f, 0.0 = disabled)", (double)params.sampling.xtc_probability),
         [](common_params & params, const std::string & value) {
             params.sampling.xtc_probability = std::stof(value);
             params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY;
@@ -1621,7 +1625,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--xtc-threshold"}, "N",
-        string_format("xtc threshold (default: %.1f, 1.0 = disabled)", (double)params.sampling.xtc_threshold),
+        string_format("xtc threshold (default: %.2f, 1.0 = disabled)", (double)params.sampling.xtc_threshold),
         [](common_params & params, const std::string & value) {
             params.sampling.xtc_threshold = std::stof(value);
             params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD;
@@ -1629,7 +1633,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--typical"}, "N",
-        string_format("locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)", (double)params.sampling.typ_p),
+        string_format("locally typical sampling, parameter p (default: %.2f, 1.0 = disabled)", (double)params.sampling.typ_p),
         [](common_params & params, const std::string & value) {
             params.sampling.typ_p = std::stof(value);
         }
@@ -1648,7 +1652,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--repeat-penalty"}, "N",
-        string_format("penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)", (double)params.sampling.penalty_repeat),
+        string_format("penalize repeat sequence of tokens (default: %.2f, 1.0 = disabled)", (double)params.sampling.penalty_repeat),
         [](common_params & params, const std::string & value) {
             params.sampling.penalty_repeat = std::stof(value);
             params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT;
@@ -1656,21 +1660,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--presence-penalty"}, "N",
-        string_format("repeat alpha presence penalty (default: %.1f, 0.0 = disabled)", (double)params.sampling.penalty_present),
+        string_format("repeat alpha presence penalty (default: %.2f, 0.0 = disabled)", (double)params.sampling.penalty_present),
         [](common_params & params, const std::string & value) {
             params.sampling.penalty_present = std::stof(value);
         }
     ).set_sparam());
     add_opt(common_arg(
         {"--frequency-penalty"}, "N",
-        string_format("repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)", (double)params.sampling.penalty_freq),
+        string_format("repeat alpha frequency penalty (default: %.2f, 0.0 = disabled)", (double)params.sampling.penalty_freq),
         [](common_params & params, const std::string & value) {
             params.sampling.penalty_freq = std::stof(value);
         }
     ).set_sparam());
     add_opt(common_arg(
         {"--dry-multiplier"}, "N",
-        string_format("set DRY sampling multiplier (default: %.1f, 0.0 = disabled)", (double)params.sampling.dry_multiplier),
+        string_format("set DRY sampling multiplier (default: %.2f, 0.0 = disabled)", (double)params.sampling.dry_multiplier),
         [](common_params & params, const std::string & value) {
             params.sampling.dry_multiplier = std::stof(value);
         }
@@ -1751,14 +1755,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--dynatemp-range"}, "N",
-        string_format("dynamic temperature range (default: %.1f, 0.0 = disabled)", (double)params.sampling.dynatemp_range),
+        string_format("dynamic temperature range (default: %.2f, 0.0 = disabled)", (double)params.sampling.dynatemp_range),
         [](common_params & params, const std::string & value) {
             params.sampling.dynatemp_range = std::stof(value);
         }
     ).set_sparam());
     add_opt(common_arg(
         {"--dynatemp-exp"}, "N",
-        string_format("dynamic temperature exponent (default: %.1f)", (double)params.sampling.dynatemp_exponent),
+        string_format("dynamic temperature exponent (default: %.2f)", (double)params.sampling.dynatemp_exponent),
         [](common_params & params, const std::string & value) {
             params.sampling.dynatemp_exponent = std::stof(value);
         }
@@ -1774,7 +1778,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--mirostat-lr"}, "N",
-        string_format("Mirostat learning rate, parameter eta (default: %.1f)", (double)params.sampling.mirostat_eta),
+        string_format("Mirostat learning rate, parameter eta (default: %.2f)", (double)params.sampling.mirostat_eta),
         [](common_params & params, const std::string & value) {
             params.sampling.mirostat_eta = std::stof(value);
             params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA;
@@ -1782,7 +1786,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_sparam());
     add_opt(common_arg(
         {"--mirostat-ent"}, "N",
-        string_format("Mirostat target entropy, parameter tau (default: %.1f)", (double)params.sampling.mirostat_tau),
+        string_format("Mirostat target entropy, parameter tau (default: %.2f)", (double)params.sampling.mirostat_tau),
         [](common_params & params, const std::string & value) {
             params.sampling.mirostat_tau = std::stof(value);
             params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU;
@@ -1916,28 +1920,28 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_env("LLAMA_ARG_YARN_ORIG_CTX"));
     add_opt(common_arg(
         {"--yarn-ext-factor"}, "N",
-        string_format("YaRN: extrapolation mix factor (default: %.1f, 0.0 = full interpolation)", (double)params.yarn_ext_factor),
+        string_format("YaRN: extrapolation mix factor (default: %.2f, 0.0 = full interpolation)", (double)params.yarn_ext_factor),
         [](common_params & params, const std::string & value) {
             params.yarn_ext_factor = std::stof(value);
         }
     ).set_env("LLAMA_ARG_YARN_EXT_FACTOR"));
     add_opt(common_arg(
         {"--yarn-attn-factor"}, "N",
-        string_format("YaRN: scale sqrt(t) or attention magnitude (default: %.1f)", (double)params.yarn_attn_factor),
+        string_format("YaRN: scale sqrt(t) or attention magnitude (default: %.2f)", (double)params.yarn_attn_factor),
         [](common_params & params, const std::string & value) {
             params.yarn_attn_factor = std::stof(value);
         }
     ).set_env("LLAMA_ARG_YARN_ATTN_FACTOR"));
     add_opt(common_arg(
         {"--yarn-beta-slow"}, "N",
-        string_format("YaRN: high correction dim or alpha (default: %.1f)", (double)params.yarn_beta_slow),
+        string_format("YaRN: high correction dim or alpha (default: %.2f)", (double)params.yarn_beta_slow),
         [](common_params & params, const std::string & value) {
             params.yarn_beta_slow = std::stof(value);
         }
     ).set_env("LLAMA_ARG_YARN_BETA_SLOW"));
     add_opt(common_arg(
         {"--yarn-beta-fast"}, "N",
-        string_format("YaRN: low correction dim or beta (default: %.1f)", (double)params.yarn_beta_fast),
+        string_format("YaRN: low correction dim or beta (default: %.2f)", (double)params.yarn_beta_fast),
         [](common_params & params, const std::string & value) {
             params.yarn_beta_fast = std::stof(value);
         }
@@ -3331,14 +3335,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_DRAFT_MIN"));
     add_opt(common_arg(
         {"--draft-p-split"}, "P",
-        string_format("speculative decoding split probability (default: %.1f)", (double)params.speculative.p_split),
+        string_format("speculative decoding split probability (default: %.2f)", (double)params.speculative.p_split),
         [](common_params & params, const std::string & value) {
             params.speculative.p_split = std::stof(value);
         }
     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE}).set_env("LLAMA_ARG_DRAFT_P_SPLIT"));
     add_opt(common_arg(
         {"--draft-p-min"}, "P",
-        string_format("minimum speculative decoding probability (greedy) (default: %.1f)", (double)params.speculative.p_min),
+        string_format("minimum speculative decoding probability (greedy) (default: %.2f)", (double)params.speculative.p_min),
         [](common_params & params, const std::string & value) {
             params.speculative.p_min = std::stof(value);
         }

package/src/llama.cpp/common/chat-parser.cpp CHANGED Viewed

@@ -1666,7 +1666,7 @@ common_chat_msg common_chat_parse(const std::string & input, bool is_partial, co
     }
     auto msg = builder.result();
     if (!is_partial) {
-        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
+        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
     }
     return msg;
 }
@@ -1699,7 +1699,7 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std
         mapper.from_ast(ctx.ast, result);
     }
     if (!is_partial) {
-        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat<json>({msg}).at(0).dump().c_str());
+        LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
     }
     return msg;
 }