@fugood/llama.node 1.4.6 → 1.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +8 -8
- package/src/llama.cpp/common/chat-parser-xml-toolcall.cpp +36 -18
- package/src/llama.cpp/common/chat-parser-xml-toolcall.h +1 -1
- package/src/llama.cpp/common/chat-parser.cpp +3 -2
- package/src/llama.cpp/common/chat.cpp +132 -0
- package/src/llama.cpp/common/console.cpp +582 -29
- package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +3 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +9 -0
- package/src/llama.cpp/src/CMakeLists.txt +2 -1
- package/src/llama.cpp/src/llama-context.cpp +6 -6
- package/src/llama.cpp/src/llama-context.h +1 -1
- package/src/llama.cpp/src/llama-grammar.cpp +233 -33
- package/src/llama.cpp/src/llama-grammar.h +20 -1
- package/src/llama.cpp/src/llama-graph.cpp +1 -1
- package/src/llama.cpp/src/llama-model.cpp +20 -8
- package/src/llama.cpp/src/models/{gemma3-iswa.cpp → gemma3.cpp} +30 -5
- package/src/llama.cpp/src/models/models.h +3 -2
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#include "models.h"
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
template <bool iswa>
|
|
4
|
+
llm_build_gemma3<iswa>::llm_build_gemma3(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
|
|
4
5
|
const int64_t n_embd_head = hparams.n_embd_head_k;
|
|
5
6
|
|
|
6
7
|
ggml_tensor * cur;
|
|
@@ -17,13 +18,28 @@ llm_build_gemma3_iswa::llm_build_gemma3_iswa(const llama_model & model, const ll
|
|
|
17
18
|
ggml_tensor * inp_pos = build_inp_pos();
|
|
18
19
|
|
|
19
20
|
// TODO: is causal == true correct? might need some changes
|
|
20
|
-
|
|
21
|
+
using inp_attn_type = std::conditional_t<iswa, llm_graph_input_attn_kv_iswa, llm_graph_input_attn_kv>;
|
|
22
|
+
inp_attn_type * inp_attn = nullptr;
|
|
23
|
+
|
|
24
|
+
if constexpr (iswa) {
|
|
25
|
+
inp_attn = build_attn_inp_kv_iswa();
|
|
26
|
+
} else {
|
|
27
|
+
inp_attn = build_attn_inp_kv();
|
|
28
|
+
}
|
|
21
29
|
|
|
22
30
|
ggml_tensor * inp_out_ids = build_inp_out_ids();
|
|
23
31
|
|
|
24
32
|
for (int il = 0; il < n_layer; ++il) {
|
|
25
|
-
|
|
26
|
-
|
|
33
|
+
float freq_base_l = 0.0f;
|
|
34
|
+
float freq_scale_l = 0.0f;
|
|
35
|
+
|
|
36
|
+
if constexpr (iswa) {
|
|
37
|
+
freq_base_l = model.get_rope_freq_base (cparams, il);
|
|
38
|
+
freq_scale_l = model.get_rope_freq_scale(cparams, il);
|
|
39
|
+
} else {
|
|
40
|
+
freq_base_l = freq_base;
|
|
41
|
+
freq_scale_l = freq_scale;
|
|
42
|
+
}
|
|
27
43
|
|
|
28
44
|
// norm
|
|
29
45
|
cur = build_norm(inpL, model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il);
|
|
@@ -102,7 +118,7 @@ llm_build_gemma3_iswa::llm_build_gemma3_iswa(const llama_model & model, const ll
|
|
|
102
118
|
cur = build_norm(cur,
|
|
103
119
|
model.layers[il].ffn_post_norm, NULL,
|
|
104
120
|
LLM_NORM_RMS, -1);
|
|
105
|
-
cb(cur, "ffn_post_norm",
|
|
121
|
+
cb(cur, "ffn_post_norm", il);
|
|
106
122
|
|
|
107
123
|
cur = ggml_add(ctx0, cur, sa_out);
|
|
108
124
|
|
|
@@ -124,8 +140,17 @@ llm_build_gemma3_iswa::llm_build_gemma3_iswa(const llama_model & model, const ll
|
|
|
124
140
|
// lm_head
|
|
125
141
|
cur = build_lora_mm(model.output, cur);
|
|
126
142
|
|
|
143
|
+
if (hparams.f_final_logit_softcapping) {
|
|
144
|
+
cur = ggml_scale(ctx0, cur, 1.0f / hparams.f_final_logit_softcapping);
|
|
145
|
+
cur = ggml_tanh(ctx0, cur);
|
|
146
|
+
cur = ggml_scale(ctx0, cur, hparams.f_final_logit_softcapping);
|
|
147
|
+
}
|
|
148
|
+
|
|
127
149
|
cb(cur, "result_output", -1);
|
|
128
150
|
res->t_logits = cur;
|
|
129
151
|
|
|
130
152
|
ggml_build_forward_expand(gf, cur);
|
|
131
153
|
}
|
|
154
|
+
|
|
155
|
+
template struct llm_build_gemma3<false>;
|
|
156
|
+
template struct llm_build_gemma3<true>;
|
|
@@ -179,8 +179,9 @@ struct llm_build_gemma2_iswa : public llm_graph_context {
|
|
|
179
179
|
llm_build_gemma2_iswa(const llama_model & model, const llm_graph_params & params);
|
|
180
180
|
};
|
|
181
181
|
|
|
182
|
-
|
|
183
|
-
|
|
182
|
+
template <bool iswa>
|
|
183
|
+
struct llm_build_gemma3 : public llm_graph_context {
|
|
184
|
+
llm_build_gemma3(const llama_model & model, const llm_graph_params & params);
|
|
184
185
|
};
|
|
185
186
|
|
|
186
187
|
struct llm_build_gemma3n_iswa : public llm_graph_context {
|