@fugood/llama.node 1.4.11 → 1.4.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +30 -30
- package/src/llama.cpp/common/arg.cpp +29 -14
- package/src/llama.cpp/common/arg.h +1 -0
- package/src/llama.cpp/common/chat-parser.cpp +11 -0
- package/src/llama.cpp/common/chat.cpp +32 -3
- package/src/llama.cpp/common/chat.h +1 -0
- package/src/llama.cpp/common/common.cpp +23 -23
- package/src/llama.cpp/common/common.h +1 -1
- package/src/llama.cpp/ggml/CMakeLists.txt +13 -1
- package/src/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +23 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +12 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +86 -25
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +15 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +768 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +0 -4
- package/src/llama.cpp/include/llama.h +13 -4
- package/src/llama.cpp/src/CMakeLists.txt +4 -0
- package/src/llama.cpp/src/llama-adapter.cpp +12 -3
- package/src/llama.cpp/src/llama-adapter.h +7 -1
- package/src/llama.cpp/src/llama-arch.cpp +76 -0
- package/src/llama.cpp/src/llama-arch.h +7 -0
- package/src/llama.cpp/src/llama-chat.cpp +11 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +22 -21
- package/src/llama.cpp/src/llama-hparams.h +4 -3
- package/src/llama.cpp/src/llama-kv-cache.h +1 -1
- package/src/llama.cpp/src/llama-mmap.cpp +11 -4
- package/src/llama.cpp/src/llama-model-loader.cpp +23 -0
- package/src/llama.cpp/src/llama-model-loader.h +2 -0
- package/src/llama.cpp/src/llama-model.cpp +287 -16
- package/src/llama.cpp/src/llama-model.h +13 -2
- package/src/llama.cpp/src/llama-sampling.cpp +44 -33
- package/src/llama.cpp/src/llama-sampling.h +3 -0
- package/src/llama.cpp/src/llama-vocab.cpp +101 -33
- package/src/llama.cpp/src/llama-vocab.h +2 -0
- package/src/llama.cpp/src/llama.cpp +52 -37
- package/src/llama.cpp/src/models/bert.cpp +4 -2
- package/src/llama.cpp/src/models/cogvlm.cpp +5 -3
- package/src/llama.cpp/src/models/deepseek2.cpp +1 -1
- package/src/llama.cpp/src/models/gemma-embedding.cpp +2 -6
- package/src/llama.cpp/src/models/gemma3.cpp +3 -4
- package/src/llama.cpp/src/models/gemma3n-iswa.cpp +4 -7
- package/src/llama.cpp/src/models/llama.cpp +19 -6
- package/src/llama.cpp/src/models/maincoder.cpp +117 -0
- package/src/llama.cpp/src/models/mimo2-iswa.cpp +123 -0
- package/src/llama.cpp/src/models/models.h +18 -0
- package/src/llama.cpp/src/models/modern-bert.cpp +115 -0
- package/src/llama.cpp/src/models/plamo3.cpp +128 -0
- package/src/llama.cpp/src/unicode.cpp +23 -14
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
#include "models.h"
|
|
2
|
+
|
|
3
|
+
template <bool iswa>
|
|
4
|
+
llm_build_plamo3<iswa>::llm_build_plamo3(const llama_model & model, const llm_graph_params & params) :
|
|
5
|
+
llm_graph_context(params) {
|
|
6
|
+
const int64_t head_dim_q = hparams.n_embd_head_k;
|
|
7
|
+
const int64_t head_dim_v = hparams.n_embd_head_v;
|
|
8
|
+
|
|
9
|
+
ggml_tensor * cur;
|
|
10
|
+
ggml_tensor * inpL = build_inp_embd(model.tok_embd);
|
|
11
|
+
ggml_tensor * inp_pos = build_inp_pos();
|
|
12
|
+
|
|
13
|
+
using inp_attn_type = std::conditional_t<iswa, llm_graph_input_attn_kv_iswa, llm_graph_input_attn_kv>;
|
|
14
|
+
inp_attn_type * inp_attn = nullptr;
|
|
15
|
+
|
|
16
|
+
if constexpr (iswa) {
|
|
17
|
+
inp_attn = build_attn_inp_kv_iswa();
|
|
18
|
+
} else {
|
|
19
|
+
inp_attn = build_attn_inp_kv();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
ggml_tensor * inp_out_ids = build_inp_out_ids();
|
|
23
|
+
|
|
24
|
+
for (int il = 0; il < n_layer; ++il) {
|
|
25
|
+
ggml_tensor * residual = inpL;
|
|
26
|
+
|
|
27
|
+
float freq_base_l = 0.0f;
|
|
28
|
+
float freq_scale_l = 0.0f;
|
|
29
|
+
if constexpr (iswa) {
|
|
30
|
+
freq_base_l = model.get_rope_freq_base (cparams, il);
|
|
31
|
+
freq_scale_l = model.get_rope_freq_scale(cparams, il);
|
|
32
|
+
} else {
|
|
33
|
+
freq_base_l = freq_base;
|
|
34
|
+
freq_scale_l = freq_scale;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
cur = build_norm(inpL, model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il);
|
|
38
|
+
cb(cur, "attn_norm", il);
|
|
39
|
+
|
|
40
|
+
ggml_tensor * qkv = build_lora_mm(model.layers[il].wqkv, cur);
|
|
41
|
+
cb(cur, "wqkv", il);
|
|
42
|
+
|
|
43
|
+
const int32_t n_head = hparams.n_head(il);
|
|
44
|
+
const int32_t n_head_kv = hparams.n_head_kv(il);
|
|
45
|
+
|
|
46
|
+
const int64_t q_offset = 0;
|
|
47
|
+
const int64_t k_offset = head_dim_q * n_head;
|
|
48
|
+
const int64_t v_offset = k_offset + head_dim_q * n_head_kv;
|
|
49
|
+
|
|
50
|
+
ggml_tensor * Qcur = ggml_view_3d(ctx0, qkv, head_dim_q, n_head, n_tokens,
|
|
51
|
+
head_dim_q * sizeof(float), qkv->nb[1], q_offset * ggml_element_size(qkv));
|
|
52
|
+
ggml_tensor * Kcur = ggml_view_3d(ctx0, qkv, head_dim_q, n_head_kv, n_tokens,
|
|
53
|
+
head_dim_q * sizeof(float), qkv->nb[1], k_offset * ggml_element_size(qkv));
|
|
54
|
+
ggml_tensor * Vcur = ggml_view_3d(ctx0, qkv, head_dim_v, n_head_kv, n_tokens,
|
|
55
|
+
head_dim_v * sizeof(float), qkv->nb[1], v_offset * ggml_element_size(qkv));
|
|
56
|
+
|
|
57
|
+
cb(Qcur, "Qcur", il);
|
|
58
|
+
cb(Kcur, "Kcur", il);
|
|
59
|
+
cb(Vcur, "Vcur", il);
|
|
60
|
+
|
|
61
|
+
Qcur = build_norm(Qcur, model.layers[il].attn_q_norm, NULL, LLM_NORM_RMS, il);
|
|
62
|
+
cb(Qcur, "attn_q_norm", il);
|
|
63
|
+
Kcur = build_norm(Kcur, model.layers[il].attn_k_norm, NULL, LLM_NORM_RMS, il);
|
|
64
|
+
cb(Kcur, "attn_k_norm", il);
|
|
65
|
+
|
|
66
|
+
Qcur = ggml_rope_ext(ctx0, Qcur, inp_pos, nullptr,
|
|
67
|
+
n_rot, rope_type, n_ctx_orig, freq_base_l, freq_scale_l,
|
|
68
|
+
ext_factor, attn_factor, beta_fast, beta_slow);
|
|
69
|
+
Kcur = ggml_rope_ext(ctx0, Kcur, inp_pos, nullptr,
|
|
70
|
+
n_rot, rope_type, n_ctx_orig, freq_base_l, freq_scale_l,
|
|
71
|
+
ext_factor, attn_factor, beta_fast, beta_slow);
|
|
72
|
+
|
|
73
|
+
const float attn_scale = 1.0f / sqrtf(float(head_dim_q));
|
|
74
|
+
|
|
75
|
+
cur = build_attn(inp_attn,
|
|
76
|
+
model.layers[il].wo, NULL,
|
|
77
|
+
Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, attn_scale, il);
|
|
78
|
+
cb(cur, "attn_out", il);
|
|
79
|
+
|
|
80
|
+
if (il == n_layer - 1 && inp_out_ids) {
|
|
81
|
+
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
|
|
82
|
+
residual = ggml_get_rows(ctx0, residual, inp_out_ids);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
cur = build_norm(cur, model.layers[il].attn_post_norm, NULL, LLM_NORM_RMS, il);
|
|
86
|
+
cb(cur, "attn_post_norm", il);
|
|
87
|
+
|
|
88
|
+
cur = ggml_add(ctx0, cur, residual);
|
|
89
|
+
cb(cur, "attn_residual", il);
|
|
90
|
+
|
|
91
|
+
residual = cur;
|
|
92
|
+
|
|
93
|
+
cur = build_norm(cur, model.layers[il].ffn_norm, NULL, LLM_NORM_RMS, il);
|
|
94
|
+
cb(cur, "ffn_norm", il);
|
|
95
|
+
|
|
96
|
+
cur = build_ffn(cur,
|
|
97
|
+
model.layers[il].ffn_up, NULL, NULL,
|
|
98
|
+
NULL, NULL, NULL,
|
|
99
|
+
model.layers[il].ffn_down, NULL, NULL,
|
|
100
|
+
NULL,
|
|
101
|
+
LLM_FFN_SWIGLU, LLM_FFN_SEQ, il);
|
|
102
|
+
cb(cur, "ffn_out", il);
|
|
103
|
+
|
|
104
|
+
cur = build_norm(cur, model.layers[il].ffn_post_norm, NULL, LLM_NORM_RMS, il);
|
|
105
|
+
cb(cur, "ffn_post_norm", il);
|
|
106
|
+
|
|
107
|
+
cur = ggml_add(ctx0, cur, residual);
|
|
108
|
+
cb(cur, "ffn_residual", il);
|
|
109
|
+
|
|
110
|
+
cur = build_cvec(cur, il);
|
|
111
|
+
cb(cur, "l_out", il);
|
|
112
|
+
inpL = cur;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
cur = inpL;
|
|
116
|
+
|
|
117
|
+
cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1);
|
|
118
|
+
res->t_embd = cur;
|
|
119
|
+
|
|
120
|
+
cur = build_lora_mm(model.output, cur);
|
|
121
|
+
res->t_logits = cur;
|
|
122
|
+
|
|
123
|
+
ggml_build_forward_expand(gf, cur);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Explicit template instantiations
|
|
127
|
+
template struct llm_build_plamo3<false>;
|
|
128
|
+
template struct llm_build_plamo3<true>;
|
|
@@ -964,6 +964,11 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|
|
964
964
|
{ "\\p{P}", unicode_cpt_flags::PUNCTUATION },
|
|
965
965
|
{ "\\p{M}", unicode_cpt_flags::ACCENT_MARK },
|
|
966
966
|
{ "\\p{S}", unicode_cpt_flags::SYMBOL },
|
|
967
|
+
{ "\\p{Lu}", unicode_cpt_flags::LETTER }, // Uppercase letter
|
|
968
|
+
{ "\\p{Ll}", unicode_cpt_flags::LETTER }, // Lowercase letter
|
|
969
|
+
{ "\\p{Lt}", unicode_cpt_flags::LETTER }, // Titlecase letter
|
|
970
|
+
{ "\\p{Lm}", unicode_cpt_flags::LETTER }, // Modifier letter
|
|
971
|
+
{ "\\p{Lo}", unicode_cpt_flags::LETTER }, // Other letter
|
|
967
972
|
};
|
|
968
973
|
|
|
969
974
|
static const std::map<int, int> k_ucat_cpt = {
|
|
@@ -1074,22 +1079,26 @@ std::vector<std::string> unicode_regex_split(const std::string & text, const std
|
|
|
1074
1079
|
continue;
|
|
1075
1080
|
}
|
|
1076
1081
|
|
|
1077
|
-
|
|
1082
|
+
// Match \p{...} Unicode properties of varying lengths
|
|
1083
|
+
if (regex_expr[i + 0] == '\\' && i + 3 < regex_expr.size() &&
|
|
1078
1084
|
regex_expr[i + 1] == 'p' &&
|
|
1079
|
-
regex_expr[i + 2] == '{'
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
if (
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
+
regex_expr[i + 2] == '{') {
|
|
1086
|
+
// Find the closing brace
|
|
1087
|
+
size_t closing_brace = regex_expr.find('}', i + 3);
|
|
1088
|
+
if (closing_brace != std::string::npos && closing_brace <= i + 10) { // reasonable limit
|
|
1089
|
+
const std::string pat = regex_expr.substr(i, closing_brace - i + 1);
|
|
1090
|
+
if (k_ucat_enum.find(pat) != k_ucat_enum.end()) {
|
|
1091
|
+
if (!inside) {
|
|
1092
|
+
regex_expr_collapsed += '[';
|
|
1093
|
+
}
|
|
1094
|
+
regex_expr_collapsed += k_ucat_cpt.at(k_ucat_enum.at(pat));
|
|
1095
|
+
regex_expr_collapsed += k_ucat_map.at(k_ucat_enum.at(pat));
|
|
1096
|
+
if (!inside) {
|
|
1097
|
+
regex_expr_collapsed += ']';
|
|
1098
|
+
}
|
|
1099
|
+
i = closing_brace;
|
|
1100
|
+
continue;
|
|
1085
1101
|
}
|
|
1086
|
-
regex_expr_collapsed += k_ucat_cpt.at(k_ucat_enum.at(pat));
|
|
1087
|
-
regex_expr_collapsed += k_ucat_map.at(k_ucat_enum.at(pat));
|
|
1088
|
-
if (!inside) {
|
|
1089
|
-
regex_expr_collapsed += ']';
|
|
1090
|
-
}
|
|
1091
|
-
i += 4;
|
|
1092
|
-
continue;
|
|
1093
1102
|
}
|
|
1094
1103
|
}
|
|
1095
1104
|
|