@fugood/llama.node 1.1.1 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +13 -0
- package/lib/index.js +3 -0
- package/lib/index.ts +6 -0
- package/package.json +14 -14
- package/src/LlamaCompletionWorker.cpp +3 -2
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +50 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +71 -596
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +14 -286
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +90 -569
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +103 -596
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +55 -341
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +3 -58
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +62 -305
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +54 -314
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +94 -673
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +16 -249
- package/src/llama.cpp/src/llama-arch.cpp +22 -0
- package/src/llama.cpp/src/llama-arch.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +2 -2
- package/src/llama.cpp/src/llama-graph.cpp +94 -0
- package/src/llama.cpp/src/llama-graph.h +12 -0
- package/src/llama.cpp/src/llama-hparams.cpp +9 -3
- package/src/llama.cpp/src/llama-hparams.h +11 -4
- package/src/llama.cpp/src/llama-model.cpp +195 -8
- package/src/tts_utils.h +3 -3
|
@@ -1369,7 +1369,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|
|
1369
1369
|
// that have no expert_gating_func model parameter set
|
|
1370
1370
|
hparams.expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX;
|
|
1371
1371
|
}
|
|
1372
|
-
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul);
|
|
1372
|
+
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false);
|
|
1373
1373
|
|
|
1374
1374
|
switch (hparams.n_layer) {
|
|
1375
1375
|
case 27: type = LLM_TYPE_16B; break;
|
|
@@ -1768,6 +1768,29 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|
|
1768
1768
|
default: type = LLM_TYPE_UNKNOWN;
|
|
1769
1769
|
}
|
|
1770
1770
|
} break;
|
|
1771
|
+
case LLM_ARCH_SMALLTHINKER:
|
|
1772
|
+
{
|
|
1773
|
+
const bool found_swa = ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa, false);
|
|
1774
|
+
|
|
1775
|
+
if (found_swa && hparams.n_swa > 0) {
|
|
1776
|
+
hparams.swa_type = LLAMA_SWA_TYPE_STANDARD;
|
|
1777
|
+
hparams.n_swa = 4096;
|
|
1778
|
+
hparams.set_swa_pattern(4, true);
|
|
1779
|
+
} else {
|
|
1780
|
+
hparams.swa_type = LLAMA_SWA_TYPE_NONE;
|
|
1781
|
+
hparams.n_no_rope_layer_step = hparams.n_layer;
|
|
1782
|
+
}
|
|
1783
|
+
|
|
1784
|
+
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp, false);
|
|
1785
|
+
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
|
1786
|
+
ml.get_key(LLM_KV_EXPERT_GATING_FUNC, hparams.expert_gating_func, false);
|
|
1787
|
+
|
|
1788
|
+
switch (hparams.n_layer) {
|
|
1789
|
+
case 32: type = LLM_TYPE_4B; break;
|
|
1790
|
+
case 52: type = LLM_TYPE_20B; break;
|
|
1791
|
+
default: type = LLM_TYPE_UNKNOWN;
|
|
1792
|
+
}
|
|
1793
|
+
} break;
|
|
1771
1794
|
default: throw std::runtime_error("unsupported model architecture");
|
|
1772
1795
|
}
|
|
1773
1796
|
|
|
@@ -5165,6 +5188,42 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|
|
5165
5188
|
}
|
|
5166
5189
|
}
|
|
5167
5190
|
} break;
|
|
5191
|
+
case LLM_ARCH_SMALLTHINKER:
|
|
5192
|
+
{
|
|
5193
|
+
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), { n_embd, n_vocab }, 0);
|
|
5194
|
+
|
|
5195
|
+
// output
|
|
5196
|
+
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), { n_embd }, 0);
|
|
5197
|
+
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
|
|
5198
|
+
|
|
5199
|
+
// if output is NULL, init from the input tok embed
|
|
5200
|
+
if (output == NULL) {
|
|
5201
|
+
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
|
|
5202
|
+
}
|
|
5203
|
+
|
|
5204
|
+
for (int i = 0; i < n_layer; ++i) {
|
|
5205
|
+
auto & layer = layers[i];
|
|
5206
|
+
|
|
5207
|
+
layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), { n_embd }, 0);
|
|
5208
|
+
|
|
5209
|
+
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), { n_embd, n_embd_head_k * n_head }, 0);
|
|
5210
|
+
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), { n_embd, n_embd_gqa }, 0);
|
|
5211
|
+
layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), { n_embd, n_embd_gqa }, 0);
|
|
5212
|
+
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_k * n_head, n_embd }, 0);
|
|
5213
|
+
|
|
5214
|
+
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), { n_embd }, 0);
|
|
5215
|
+
|
|
5216
|
+
GGML_ASSERT(n_expert > 0 && "n_expert must be > 0 for SMALLTHINKER");
|
|
5217
|
+
GGML_ASSERT(n_expert_used > 0 && "n_expert_used must be > 0 for SMALLTHINKER");
|
|
5218
|
+
|
|
5219
|
+
// MoE branch
|
|
5220
|
+
const int64_t n_ff_exp = hparams.n_ff_exp;
|
|
5221
|
+
layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), { n_embd, n_expert }, 0);
|
|
5222
|
+
layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), { n_embd, n_ff_exp, n_expert }, 0);
|
|
5223
|
+
layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), { n_ff_exp, n_embd, n_expert }, 0);
|
|
5224
|
+
layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), { n_embd, n_ff_exp, n_expert }, 0);
|
|
5225
|
+
}
|
|
5226
|
+
} break;
|
|
5168
5227
|
default:
|
|
5169
5228
|
throw std::runtime_error("unknown architecture");
|
|
5170
5229
|
}
|
|
@@ -5490,6 +5549,11 @@ void llama_model::print_info() const {
|
|
|
5490
5549
|
LLAMA_LOG_INFO("%s: expert_weights_norm = %d\n", __func__, hparams.expert_weights_norm);
|
|
5491
5550
|
}
|
|
5492
5551
|
|
|
5552
|
+
if (arch == LLM_ARCH_SMALLTHINKER) {
|
|
5553
|
+
LLAMA_LOG_INFO("%s: n_ff_exp = %d\n", __func__, hparams.n_ff_exp);
|
|
5554
|
+
LLAMA_LOG_INFO("%s: expert_gating_func = %s\n", __func__, llama_expert_gating_func_name((llama_expert_gating_func_type) hparams.expert_gating_func));
|
|
5555
|
+
}
|
|
5556
|
+
|
|
5493
5557
|
vocab.print_info();
|
|
5494
5558
|
}
|
|
5495
5559
|
|
|
@@ -16191,7 +16255,7 @@ private:
|
|
|
16191
16255
|
{
|
|
16192
16256
|
// PLaMo-2 uses combined QKV tensor
|
|
16193
16257
|
ggml_tensor * qkv = build_lora_mm(model.layers[il].wqkv, cur);
|
|
16194
|
-
cb(qkv, "
|
|
16258
|
+
cb(qkv, "wqkv", il);
|
|
16195
16259
|
|
|
16196
16260
|
// split QKV tensor into Q, K, V
|
|
16197
16261
|
const int64_t n_embd_head_q = hparams.n_embd_head_k;
|
|
@@ -16231,7 +16295,7 @@ private:
|
|
|
16231
16295
|
ext_factor, attn_factor, beta_fast, beta_slow
|
|
16232
16296
|
);
|
|
16233
16297
|
|
|
16234
|
-
cur = build_attn(inp, model.layers[il].wo, NULL, Qcur, Kcur, Vcur, NULL, NULL, 1.0f, il);
|
|
16298
|
+
cur = build_attn(inp, model.layers[il].wo, NULL, Qcur, Kcur, Vcur, NULL, NULL, 1.0f/sqrtf(float(n_embd_head_v)), il);
|
|
16235
16299
|
}
|
|
16236
16300
|
|
|
16237
16301
|
cb(cur, "attn_out", il);
|
|
@@ -16306,8 +16370,9 @@ private:
|
|
|
16306
16370
|
ggml_build_forward_expand(gf,
|
|
16307
16371
|
ggml_cpy(ctx0, last_conv,
|
|
16308
16372
|
ggml_view_1d(ctx0, conv_states_all,
|
|
16309
|
-
(d_conv - 1)*(d_inner)*(n_seqs),
|
|
16310
|
-
kv_head*(d_conv - 1)*(d_inner)*ggml_element_size(conv_states_all))));
|
|
16373
|
+
(d_conv - 1)*(d_inner + 2*n_group*d_state)*(n_seqs),
|
|
16374
|
+
kv_head*(d_conv - 1)*(d_inner + 2*n_group*d_state)*ggml_element_size(conv_states_all))));
|
|
16375
|
+
cb(conv_states_all, "mamba_conv1d_state", il);
|
|
16311
16376
|
|
|
16312
16377
|
// 1D convolution
|
|
16313
16378
|
x = ggml_ssm_conv(ctx0, conv_x, model.layers[il].ssm_conv1d);
|
|
@@ -16370,9 +16435,9 @@ private:
|
|
|
16370
16435
|
// store last states
|
|
16371
16436
|
ggml_build_forward_expand(gf,
|
|
16372
16437
|
ggml_cpy(ctx0,
|
|
16373
|
-
ggml_view_1d(ctx0, y_ssm, d_state*
|
|
16374
|
-
ggml_view_1d(ctx0, ssm_states_all, d_state*
|
|
16375
|
-
|
|
16438
|
+
ggml_view_1d(ctx0, y_ssm, n_heads*head_dim*d_state*n_seqs, n_heads*head_dim*n_seq_tokens*n_seqs*ggml_element_size(y_ssm)),
|
|
16439
|
+
ggml_view_1d(ctx0, ssm_states_all, n_heads*head_dim*d_state*n_seqs, kv_head*n_seqs*n_heads*head_dim*d_state*ggml_element_size(ssm_states_all))));
|
|
16440
|
+
cb(ssm_states_all, "mamba_ssm_states", il);
|
|
16376
16441
|
|
|
16377
16442
|
ggml_tensor * y = ggml_view_4d(ctx0, y_ssm, head_dim, n_heads, n_seq_tokens, n_seqs, head_dim * ggml_element_size(x), head_dim * n_heads * ggml_element_size(x), head_dim * n_heads * n_seq_tokens * ggml_element_size(x), 0);
|
|
16378
16443
|
cb(y, "mamba_y_view", il);
|
|
@@ -17010,6 +17075,119 @@ struct llm_build_lfm2 : public llm_graph_context {
|
|
|
17010
17075
|
}
|
|
17011
17076
|
};
|
|
17012
17077
|
|
|
17078
|
+
template <bool iswa>
|
|
17079
|
+
struct llm_build_smallthinker : public llm_graph_context{
|
|
17080
|
+
llm_build_smallthinker(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params){
|
|
17081
|
+
const int64_t n_embd_head = hparams.n_embd_head_v;
|
|
17082
|
+
|
|
17083
|
+
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
|
17084
|
+
GGML_ASSERT(n_embd_head == hparams.n_rot);
|
|
17085
|
+
|
|
17086
|
+
ggml_tensor * cur;
|
|
17087
|
+
ggml_tensor * inpL;
|
|
17088
|
+
|
|
17089
|
+
inpL = build_inp_embd(model.tok_embd);
|
|
17090
|
+
|
|
17091
|
+
// inp_pos - contains the positions
|
|
17092
|
+
ggml_tensor * inp_pos = build_inp_pos();
|
|
17093
|
+
|
|
17094
|
+
using inp_attn_type = std::conditional_t<iswa, llm_graph_input_attn_kv_unified_iswa, llm_graph_input_attn_kv_unified>;
|
|
17095
|
+
inp_attn_type * inp_attn = nullptr;
|
|
17096
|
+
|
|
17097
|
+
if constexpr (iswa) {
|
|
17098
|
+
inp_attn = build_attn_inp_kv_unified_iswa();
|
|
17099
|
+
} else {
|
|
17100
|
+
inp_attn = build_attn_inp_kv_unified();
|
|
17101
|
+
}
|
|
17102
|
+
|
|
17103
|
+
ggml_tensor * inp_out_ids = build_inp_out_ids();
|
|
17104
|
+
|
|
17105
|
+
for (int il = 0; il < n_layer; ++il) {
|
|
17106
|
+
ggml_tensor * inpSA = inpL;
|
|
17107
|
+
ggml_tensor * probs = nullptr;
|
|
17108
|
+
|
|
17109
|
+
probs = build_lora_mm(model.layers[il].ffn_gate_inp, inpL); // [n_expert, n_tokens]
|
|
17110
|
+
cb(probs, "ffn_moe_logits", il);
|
|
17111
|
+
|
|
17112
|
+
// norm
|
|
17113
|
+
cur = build_norm(inpL,model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il);
|
|
17114
|
+
cb(cur, "attn_norm", il);
|
|
17115
|
+
|
|
17116
|
+
// self_attention
|
|
17117
|
+
{
|
|
17118
|
+
// compute Q and K and RoPE them
|
|
17119
|
+
struct ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur);
|
|
17120
|
+
cb(Qcur, "Qcur", il);
|
|
17121
|
+
|
|
17122
|
+
struct ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur);
|
|
17123
|
+
cb(Kcur, "Kcur", il);
|
|
17124
|
+
|
|
17125
|
+
struct ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur);
|
|
17126
|
+
cb(Vcur, "Vcur", il);
|
|
17127
|
+
|
|
17128
|
+
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
|
|
17129
|
+
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
|
|
17130
|
+
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
|
|
17131
|
+
|
|
17132
|
+
if (hparams.n_no_rope_layer_step == n_layer || il % hparams.n_no_rope_layer_step != 0) {
|
|
17133
|
+
Qcur = ggml_rope_ext(ctx0, Qcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
|
|
17134
|
+
ext_factor, attn_factor, beta_fast, beta_slow);
|
|
17135
|
+
|
|
17136
|
+
Kcur = ggml_rope_ext(ctx0, Kcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
|
|
17137
|
+
ext_factor, attn_factor, beta_fast, beta_slow);
|
|
17138
|
+
}
|
|
17139
|
+
|
|
17140
|
+
cb(Qcur, "Qcur", il);
|
|
17141
|
+
cb(Kcur, "Kcur", il);
|
|
17142
|
+
|
|
17143
|
+
cur = build_attn(inp_attn,
|
|
17144
|
+
model.layers[il].wo, model.layers[il].bo,
|
|
17145
|
+
Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f / sqrtf(float(n_embd_head)), il);
|
|
17146
|
+
}
|
|
17147
|
+
|
|
17148
|
+
if (il == n_layer - 1 && inp_out_ids) {
|
|
17149
|
+
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
|
|
17150
|
+
inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
|
|
17151
|
+
probs = ggml_get_rows(ctx0, probs, inp_out_ids);
|
|
17152
|
+
}
|
|
17153
|
+
|
|
17154
|
+
ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
|
|
17155
|
+
cb(ffn_inp, "ffn_inp", il);
|
|
17156
|
+
|
|
17157
|
+
// MoE branch
|
|
17158
|
+
cur = build_norm(ffn_inp, model.layers[il].ffn_norm, NULL, LLM_NORM_RMS, il);
|
|
17159
|
+
cb(cur, "ffn_norm", il);
|
|
17160
|
+
|
|
17161
|
+
ggml_tensor * ffn_out = build_moe_ffn_from_probs(cur, probs, model.layers[il].ffn_up_exps,
|
|
17162
|
+
model.layers[il].ffn_gate_exps, model.layers[il].ffn_down_exps,
|
|
17163
|
+
nullptr, n_expert, n_expert_used,
|
|
17164
|
+
static_cast<llama_expert_gating_func_type>(hparams.expert_gating_func), il);
|
|
17165
|
+
|
|
17166
|
+
cb(ffn_out, "ffn_out", il);
|
|
17167
|
+
cur = ffn_out;
|
|
17168
|
+
|
|
17169
|
+
cur = ggml_add(ctx0, cur, ffn_inp);
|
|
17170
|
+
cur = build_cvec(cur, il);
|
|
17171
|
+
cb(cur, "l_out", il);
|
|
17172
|
+
|
|
17173
|
+
// input for next layer
|
|
17174
|
+
inpL = cur;
|
|
17175
|
+
}
|
|
17176
|
+
|
|
17177
|
+
cur = inpL;
|
|
17178
|
+
|
|
17179
|
+
cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1);
|
|
17180
|
+
cb(cur, "result_norm", -1);
|
|
17181
|
+
|
|
17182
|
+
// lm_head
|
|
17183
|
+
cur = build_lora_mm(model.output, cur);
|
|
17184
|
+
cb(cur, "result_output", -1);
|
|
17185
|
+
res->t_logits = cur;
|
|
17186
|
+
|
|
17187
|
+
ggml_build_forward_expand(gf, cur);
|
|
17188
|
+
}
|
|
17189
|
+
};
|
|
17190
|
+
|
|
17013
17191
|
llama_memory_i * llama_model::create_memory(const llama_memory_params & params, llama_cparams & cparams) const {
|
|
17014
17192
|
llama_memory_i * res;
|
|
17015
17193
|
|
|
@@ -17448,6 +17626,14 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
|
|
|
17448
17626
|
{
|
|
17449
17627
|
llm = std::make_unique<llm_build_lfm2>(*this, params);
|
|
17450
17628
|
} break;
|
|
17629
|
+
case LLM_ARCH_SMALLTHINKER:
|
|
17630
|
+
{
|
|
17631
|
+
if (hparams.swa_type == LLAMA_SWA_TYPE_STANDARD) {
|
|
17632
|
+
llm = std::make_unique<llm_build_smallthinker<true>> (*this, params);
|
|
17633
|
+
} else {
|
|
17634
|
+
llm = std::make_unique<llm_build_smallthinker<false>>(*this, params);
|
|
17635
|
+
}
|
|
17636
|
+
} break;
|
|
17451
17637
|
default:
|
|
17452
17638
|
GGML_ABORT("fatal error");
|
|
17453
17639
|
}
|
|
@@ -17646,6 +17832,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
|
|
|
17646
17832
|
case LLM_ARCH_DOTS1:
|
|
17647
17833
|
case LLM_ARCH_HUNYUAN_MOE:
|
|
17648
17834
|
case LLM_ARCH_LFM2:
|
|
17835
|
+
case LLM_ARCH_SMALLTHINKER:
|
|
17649
17836
|
return LLAMA_ROPE_TYPE_NEOX;
|
|
17650
17837
|
|
|
17651
17838
|
case LLM_ARCH_QWEN2VL:
|
package/src/tts_utils.h
CHANGED
|
@@ -68,7 +68,7 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
|
|
|
68
68
|
static const char *OUTETTS_V1_GRAMMAR = R"(
|
|
69
69
|
root ::= NL? wordAudioBlock+ audioEnd NL eos?
|
|
70
70
|
wordAudioBlock ::= WORD codeBlock NL
|
|
71
|
-
codeBlock ::= TIME CODE
|
|
71
|
+
codeBlock ::= TIME CODE*
|
|
72
72
|
eos ::= "<|im_end|>"
|
|
73
73
|
codeStart ::= "<|code_start|>"
|
|
74
74
|
codeEnd ::= "<|code_end|>"
|
|
@@ -85,7 +85,7 @@ static const char *OUTETTS_V2_GRAMMAR = R"(
|
|
|
85
85
|
root ::= NL? content+ audioEnd NL eos?
|
|
86
86
|
content ::= wordAudioBlock | emotionBlock
|
|
87
87
|
wordAudioBlock ::= WORD punch* codeBlock space NL
|
|
88
|
-
codeBlock ::= TIME CODE
|
|
88
|
+
codeBlock ::= TIME CODE*
|
|
89
89
|
emotionBlock ::= emotionStart TEXT emotionEnd space NL
|
|
90
90
|
TEXT ::= [A-Za-z0-9 .,?!]+
|
|
91
91
|
eos ::= "<|im_end|>"
|
|
@@ -94,7 +94,7 @@ emotionEnd ::= "<|emotion_end|>"
|
|
|
94
94
|
audioEnd ::= "<|audio_end|>"
|
|
95
95
|
space ::= "<|space|>"
|
|
96
96
|
WORD ::= [A-Za-z]+
|
|
97
|
-
NL ::=
|
|
97
|
+
NL ::= [\n]
|
|
98
98
|
TIME ::= "<|t_" DECIMAL "|>"
|
|
99
99
|
CODE ::= "<|" DIGITS "|>"
|
|
100
100
|
DIGITS ::= [0-9]+
|