npm - @fugood/llama.node - Versions diffs - 1.4.6 → 1.4.7 - Mend

@fugood/llama.node 1.4.6 → 1.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/package.json +15 -15
package/scripts/llama.cpp.patch +8 -8
package/src/llama.cpp/common/chat-parser-xml-toolcall.cpp +36 -18
package/src/llama.cpp/common/chat-parser-xml-toolcall.h +1 -1
package/src/llama.cpp/common/chat-parser.cpp +3 -2
package/src/llama.cpp/common/chat.cpp +132 -0
package/src/llama.cpp/common/console.cpp +582 -29
package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +3 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +9 -0
package/src/llama.cpp/src/CMakeLists.txt +2 -1
package/src/llama.cpp/src/llama-context.cpp +6 -6
package/src/llama.cpp/src/llama-context.h +1 -1
package/src/llama.cpp/src/llama-grammar.cpp +233 -33
package/src/llama.cpp/src/llama-grammar.h +20 -1
package/src/llama.cpp/src/llama-graph.cpp +1 -1
package/src/llama.cpp/src/llama-model.cpp +20 -8
package/src/llama.cpp/src/models/{gemma3-iswa.cpp → gemma3.cpp} +30 -5
package/src/llama.cpp/src/models/models.h +3 -2

package/src/llama.cpp/src/models/{gemma3-iswa.cpp → gemma3.cpp} RENAMED Viewed

@@ -1,6 +1,7 @@
 #include "models.h"
-llm_build_gemma3_iswa::llm_build_gemma3_iswa(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
+template <bool iswa>
+llm_build_gemma3<iswa>::llm_build_gemma3(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
     const int64_t n_embd_head = hparams.n_embd_head_k;
     ggml_tensor * cur;
@@ -17,13 +18,28 @@ llm_build_gemma3_iswa::llm_build_gemma3_iswa(const llama_model & model, const ll
     ggml_tensor * inp_pos = build_inp_pos();
     // TODO: is causal == true correct? might need some changes
-    auto * inp_attn = build_attn_inp_kv_iswa();
+    using inp_attn_type = std::conditional_t<iswa, llm_graph_input_attn_kv_iswa, llm_graph_input_attn_kv>;
+    inp_attn_type * inp_attn = nullptr;
+    if constexpr (iswa) {
+        inp_attn = build_attn_inp_kv_iswa();
+    } else {
+        inp_attn = build_attn_inp_kv();
+    }
     ggml_tensor * inp_out_ids = build_inp_out_ids();
     for (int il = 0; il < n_layer; ++il) {
-        const float freq_base_l  = model.get_rope_freq_base (cparams, il);
-        const float freq_scale_l = model.get_rope_freq_scale(cparams, il);
+        float freq_base_l  = 0.0f;
+        float freq_scale_l = 0.0f;
+        if constexpr (iswa) {
+            freq_base_l  = model.get_rope_freq_base (cparams, il);
+            freq_scale_l = model.get_rope_freq_scale(cparams, il);
+        } else {
+            freq_base_l  = freq_base;
+            freq_scale_l = freq_scale;
+        }
         // norm
         cur = build_norm(inpL, model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il);
@@ -102,7 +118,7 @@ llm_build_gemma3_iswa::llm_build_gemma3_iswa(const llama_model & model, const ll
         cur = build_norm(cur,
                 model.layers[il].ffn_post_norm, NULL,
                 LLM_NORM_RMS, -1);
-        cb(cur, "ffn_post_norm", -1);
+        cb(cur, "ffn_post_norm", il);
         cur = ggml_add(ctx0, cur, sa_out);
@@ -124,8 +140,17 @@ llm_build_gemma3_iswa::llm_build_gemma3_iswa(const llama_model & model, const ll
     // lm_head
     cur = build_lora_mm(model.output, cur);
+    if (hparams.f_final_logit_softcapping) {
+        cur = ggml_scale(ctx0, cur, 1.0f / hparams.f_final_logit_softcapping);
+        cur = ggml_tanh(ctx0, cur);
+        cur = ggml_scale(ctx0, cur, hparams.f_final_logit_softcapping);
+    }
     cb(cur, "result_output", -1);
     res->t_logits = cur;
     ggml_build_forward_expand(gf, cur);
 }
+template struct llm_build_gemma3<false>;
+template struct llm_build_gemma3<true>;

package/src/llama.cpp/src/models/models.h CHANGED Viewed

@@ -179,8 +179,9 @@ struct llm_build_gemma2_iswa : public llm_graph_context {
     llm_build_gemma2_iswa(const llama_model & model, const llm_graph_params & params);
 };
-struct llm_build_gemma3_iswa : public llm_graph_context {
-    llm_build_gemma3_iswa(const llama_model & model, const llm_graph_params & params);
+template <bool iswa>
+struct llm_build_gemma3 : public llm_graph_context {
+    llm_build_gemma3(const llama_model & model, const llm_graph_params & params);
 };
 struct llm_build_gemma3n_iswa : public llm_graph_context {