@fugood/llama.node 0.3.14 → 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +1 -1
- package/src/llama.cpp/.github/workflows/build.yml +30 -1
- package/src/llama.cpp/CMakeLists.txt +9 -1
- package/src/llama.cpp/cmake/common.cmake +2 -0
- package/src/llama.cpp/common/arg.cpp +20 -2
- package/src/llama.cpp/common/common.cpp +6 -3
- package/src/llama.cpp/common/speculative.cpp +4 -4
- package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +2 -2
- package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +1 -1
- package/src/llama.cpp/examples/embedding/embedding.cpp +1 -1
- package/src/llama.cpp/examples/gritlm/gritlm.cpp +2 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +1 -1
- package/src/llama.cpp/examples/infill/infill.cpp +2 -2
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +2 -2
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +4 -4
- package/src/llama.cpp/examples/llava/gemma3-cli.cpp +1 -1
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -6
- package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
- package/src/llama.cpp/examples/main/main.cpp +6 -6
- package/src/llama.cpp/examples/parallel/parallel.cpp +5 -5
- package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +6 -6
- package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -2
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +1 -1
- package/src/llama.cpp/examples/run/run.cpp +91 -46
- package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +2 -2
- package/src/llama.cpp/examples/server/server.cpp +32 -15
- package/src/llama.cpp/examples/server/utils.hpp +3 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/speculative/speculative.cpp +14 -14
- package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
- package/src/llama.cpp/examples/tts/tts.cpp +12 -9
- package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/cmake/common.cmake +26 -0
- package/src/llama.cpp/ggml/include/ggml.h +24 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +5 -27
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +6 -2
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +15 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +150 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +253 -2
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -1
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +7 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +95 -22
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +3 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +66 -26
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +12 -13
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +40 -40
- package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -2
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +103 -34
- package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +19 -20
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +114 -6
- package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +6 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +352 -146
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -4
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +3 -0
- package/src/llama.cpp/ggml/src/ggml.c +85 -2
- package/src/llama.cpp/include/llama.h +86 -22
- package/src/llama.cpp/src/CMakeLists.txt +5 -2
- package/src/llama.cpp/src/llama-adapter.cpp +19 -20
- package/src/llama.cpp/src/llama-adapter.h +11 -9
- package/src/llama.cpp/src/llama-arch.cpp +102 -16
- package/src/llama.cpp/src/llama-arch.h +18 -0
- package/src/llama.cpp/src/llama-batch.h +2 -2
- package/src/llama.cpp/src/llama-context.cpp +2253 -1222
- package/src/llama.cpp/src/llama-context.h +214 -77
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +1662 -0
- package/src/llama.cpp/src/llama-graph.h +574 -0
- package/src/llama.cpp/src/llama-hparams.cpp +8 -0
- package/src/llama.cpp/src/llama-hparams.h +9 -0
- package/src/llama.cpp/src/llama-io.cpp +15 -0
- package/src/llama.cpp/src/llama-io.h +35 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +1006 -291
- package/src/llama.cpp/src/llama-kv-cache.h +178 -110
- package/src/llama.cpp/src/llama-memory.cpp +1 -0
- package/src/llama.cpp/src/llama-memory.h +21 -0
- package/src/llama.cpp/src/llama-model.cpp +8207 -163
- package/src/llama.cpp/src/llama-model.h +34 -1
- package/src/llama.cpp/src/llama-quant.cpp +10 -1
- package/src/llama.cpp/src/llama.cpp +51 -9984
- package/src/llama.cpp/tests/test-backend-ops.cpp +88 -9
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +0 -143
- package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +0 -9
|
@@ -59,6 +59,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|
|
59
59
|
{ LLM_ARCH_EXAONE, "exaone" },
|
|
60
60
|
{ LLM_ARCH_RWKV6, "rwkv6" },
|
|
61
61
|
{ LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
|
|
62
|
+
{ LLM_ARCH_RWKV7, "rwkv7" },
|
|
63
|
+
{ LLM_ARCH_ARWKV7, "arwkv7" },
|
|
62
64
|
{ LLM_ARCH_GRANITE, "granite" },
|
|
63
65
|
{ LLM_ARCH_GRANITE_MOE, "granitemoe" },
|
|
64
66
|
{ LLM_ARCH_CHAMELEON, "chameleon" },
|
|
@@ -110,22 +112,26 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
110
112
|
{ LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
|
|
111
113
|
{ LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
|
|
112
114
|
|
|
113
|
-
{ LLM_KV_ATTENTION_HEAD_COUNT,
|
|
114
|
-
{ LLM_KV_ATTENTION_HEAD_COUNT_KV,
|
|
115
|
-
{ LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
|
|
116
|
-
{ LLM_KV_ATTENTION_CLAMP_KQV,
|
|
117
|
-
{ LLM_KV_ATTENTION_KEY_LENGTH,
|
|
118
|
-
{ LLM_KV_ATTENTION_VALUE_LENGTH,
|
|
119
|
-
{ LLM_KV_ATTENTION_LAYERNORM_EPS,
|
|
120
|
-
{ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
|
|
121
|
-
{ LLM_KV_ATTENTION_GROUPNORM_EPS,
|
|
122
|
-
{ LLM_KV_ATTENTION_GROUPNORM_GROUPS,
|
|
123
|
-
{ LLM_KV_ATTENTION_CAUSAL,
|
|
124
|
-
{ LLM_KV_ATTENTION_Q_LORA_RANK,
|
|
125
|
-
{ LLM_KV_ATTENTION_KV_LORA_RANK,
|
|
126
|
-
{
|
|
127
|
-
{
|
|
128
|
-
{
|
|
115
|
+
{ LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
|
|
116
|
+
{ LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
|
|
117
|
+
{ LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
|
|
118
|
+
{ LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
|
|
119
|
+
{ LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
|
|
120
|
+
{ LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
|
|
121
|
+
{ LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
|
|
122
|
+
{ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
|
|
123
|
+
{ LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
|
|
124
|
+
{ LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
|
|
125
|
+
{ LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
|
|
126
|
+
{ LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
|
|
127
|
+
{ LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
|
|
128
|
+
{ LLM_KV_ATTENTION_DECAY_LORA_RANK, "%s.attention.decay_lora_rank" },
|
|
129
|
+
{ LLM_KV_ATTENTION_ICLR_LORA_RANK, "%s.attention.iclr_lora_rank" },
|
|
130
|
+
{ LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK, "%s.attention.value_residual_mix_lora_rank" },
|
|
131
|
+
{ LLM_KV_ATTENTION_GATE_LORA_RANK, "%s.attention.gate_lora_rank" },
|
|
132
|
+
{ LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
|
|
133
|
+
{ LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
|
|
134
|
+
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
|
|
129
135
|
|
|
130
136
|
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
|
|
131
137
|
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
|
|
@@ -1238,6 +1244,74 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1238
1244
|
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1239
1245
|
},
|
|
1240
1246
|
},
|
|
1247
|
+
{
|
|
1248
|
+
LLM_ARCH_RWKV7,
|
|
1249
|
+
{
|
|
1250
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1251
|
+
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
|
1252
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1253
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1254
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1255
|
+
{ LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
|
|
1256
|
+
{ LLM_TENSOR_TIME_MIX_W0, "blk.%d.time_mix_w0" },
|
|
1257
|
+
{ LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
|
|
1258
|
+
{ LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
|
|
1259
|
+
{ LLM_TENSOR_TIME_MIX_A0, "blk.%d.time_mix_a0" },
|
|
1260
|
+
{ LLM_TENSOR_TIME_MIX_A1, "blk.%d.time_mix_a1" },
|
|
1261
|
+
{ LLM_TENSOR_TIME_MIX_A2, "blk.%d.time_mix_a2" },
|
|
1262
|
+
{ LLM_TENSOR_TIME_MIX_V0, "blk.%d.time_mix_v0" },
|
|
1263
|
+
{ LLM_TENSOR_TIME_MIX_V1, "blk.%d.time_mix_v1" },
|
|
1264
|
+
{ LLM_TENSOR_TIME_MIX_V2, "blk.%d.time_mix_v2" },
|
|
1265
|
+
{ LLM_TENSOR_TIME_MIX_G1, "blk.%d.time_mix_g1" },
|
|
1266
|
+
{ LLM_TENSOR_TIME_MIX_G2, "blk.%d.time_mix_g2" },
|
|
1267
|
+
{ LLM_TENSOR_TIME_MIX_K_K, "blk.%d.time_mix_k_k" },
|
|
1268
|
+
{ LLM_TENSOR_TIME_MIX_K_A, "blk.%d.time_mix_k_a" },
|
|
1269
|
+
{ LLM_TENSOR_TIME_MIX_R_K, "blk.%d.time_mix_r_k" },
|
|
1270
|
+
{ LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
|
|
1271
|
+
{ LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
|
|
1272
|
+
{ LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
|
|
1273
|
+
{ LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
|
|
1274
|
+
{ LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
|
|
1275
|
+
{ LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
|
|
1276
|
+
{ LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
|
|
1277
|
+
{ LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
|
|
1278
|
+
{ LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
|
|
1279
|
+
},
|
|
1280
|
+
},
|
|
1281
|
+
{
|
|
1282
|
+
LLM_ARCH_ARWKV7,
|
|
1283
|
+
{
|
|
1284
|
+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
|
1285
|
+
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
|
1286
|
+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
|
1287
|
+
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1288
|
+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1289
|
+
{ LLM_TENSOR_TIME_MIX_W0, "blk.%d.time_mix_w0" },
|
|
1290
|
+
{ LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
|
|
1291
|
+
{ LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
|
|
1292
|
+
{ LLM_TENSOR_TIME_MIX_A0, "blk.%d.time_mix_a0" },
|
|
1293
|
+
{ LLM_TENSOR_TIME_MIX_A1, "blk.%d.time_mix_a1" },
|
|
1294
|
+
{ LLM_TENSOR_TIME_MIX_A2, "blk.%d.time_mix_a2" },
|
|
1295
|
+
{ LLM_TENSOR_TIME_MIX_V0, "blk.%d.time_mix_v0" },
|
|
1296
|
+
{ LLM_TENSOR_TIME_MIX_V1, "blk.%d.time_mix_v1" },
|
|
1297
|
+
{ LLM_TENSOR_TIME_MIX_V2, "blk.%d.time_mix_v2" },
|
|
1298
|
+
{ LLM_TENSOR_TIME_MIX_G1, "blk.%d.time_mix_g1" },
|
|
1299
|
+
{ LLM_TENSOR_TIME_MIX_G2, "blk.%d.time_mix_g2" },
|
|
1300
|
+
{ LLM_TENSOR_TIME_MIX_K_K, "blk.%d.time_mix_k_k" },
|
|
1301
|
+
{ LLM_TENSOR_TIME_MIX_K_A, "blk.%d.time_mix_k_a" },
|
|
1302
|
+
{ LLM_TENSOR_TIME_MIX_R_K, "blk.%d.time_mix_r_k" },
|
|
1303
|
+
{ LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
|
|
1304
|
+
{ LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
|
|
1305
|
+
{ LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
|
|
1306
|
+
{ LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
|
|
1307
|
+
{ LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
|
|
1308
|
+
{ LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
|
|
1309
|
+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1310
|
+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
|
|
1311
|
+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
|
1312
|
+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
1313
|
+
},
|
|
1314
|
+
},
|
|
1241
1315
|
{
|
|
1242
1316
|
LLM_ARCH_GRANITE,
|
|
1243
1317
|
{
|
|
@@ -1397,6 +1471,12 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
|
1397
1471
|
{LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1398
1472
|
{LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1399
1473
|
{LLM_TENSOR_TIME_MIX_W2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1474
|
+
{LLM_TENSOR_TIME_MIX_A1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1475
|
+
{LLM_TENSOR_TIME_MIX_A2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1476
|
+
{LLM_TENSOR_TIME_MIX_V1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1477
|
+
{LLM_TENSOR_TIME_MIX_V2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1478
|
+
{LLM_TENSOR_TIME_MIX_G1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1479
|
+
{LLM_TENSOR_TIME_MIX_G2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1400
1480
|
{LLM_TENSOR_TIME_MIX_DECAY_W1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1401
1481
|
{LLM_TENSOR_TIME_MIX_DECAY_W2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
1402
1482
|
{LLM_TENSOR_TIME_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
|
|
@@ -1415,6 +1495,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
|
1415
1495
|
{LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1416
1496
|
{LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1417
1497
|
{LLM_TENSOR_CHANNEL_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1498
|
+
{LLM_TENSOR_TIME_MIX_K_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1499
|
+
{LLM_TENSOR_TIME_MIX_K_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1500
|
+
{LLM_TENSOR_TIME_MIX_R_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1418
1501
|
{LLM_TENSOR_TIME_MIX_LERP_W, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
|
|
1419
1502
|
{LLM_TENSOR_TIME_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
|
|
1420
1503
|
{LLM_TENSOR_TIME_MIX_LERP_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
|
|
@@ -1422,6 +1505,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
|
1422
1505
|
{LLM_TENSOR_TIME_MIX_LERP_G, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
|
|
1423
1506
|
{LLM_TENSOR_TIME_MIX_LERP_FUSED, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
|
|
1424
1507
|
{LLM_TENSOR_TIME_MIX_DECAY, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
|
|
1508
|
+
{LLM_TENSOR_TIME_MIX_W0, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
|
|
1509
|
+
{LLM_TENSOR_TIME_MIX_A0, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
|
|
1510
|
+
{LLM_TENSOR_TIME_MIX_V0, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
|
|
1425
1511
|
{LLM_TENSOR_TIME_MIX_FIRST, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_RWKV_WKV6}},
|
|
1426
1512
|
{LLM_TENSOR_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1427
1513
|
{LLM_TENSOR_ATTN_NORM_2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
@@ -63,6 +63,8 @@ enum llm_arch {
|
|
|
63
63
|
LLM_ARCH_EXAONE,
|
|
64
64
|
LLM_ARCH_RWKV6,
|
|
65
65
|
LLM_ARCH_RWKV6QWEN2,
|
|
66
|
+
LLM_ARCH_RWKV7,
|
|
67
|
+
LLM_ARCH_ARWKV7,
|
|
66
68
|
LLM_ARCH_GRANITE,
|
|
67
69
|
LLM_ARCH_GRANITE_MOE,
|
|
68
70
|
LLM_ARCH_CHAMELEON,
|
|
@@ -127,6 +129,10 @@ enum llm_kv {
|
|
|
127
129
|
LLM_KV_ATTENTION_CAUSAL,
|
|
128
130
|
LLM_KV_ATTENTION_Q_LORA_RANK,
|
|
129
131
|
LLM_KV_ATTENTION_KV_LORA_RANK,
|
|
132
|
+
LLM_KV_ATTENTION_DECAY_LORA_RANK,
|
|
133
|
+
LLM_KV_ATTENTION_ICLR_LORA_RANK,
|
|
134
|
+
LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK,
|
|
135
|
+
LLM_KV_ATTENTION_GATE_LORA_RANK,
|
|
130
136
|
LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
|
|
131
137
|
LLM_KV_ATTENTION_SLIDING_WINDOW,
|
|
132
138
|
LLM_KV_ATTENTION_SCALE,
|
|
@@ -250,8 +256,20 @@ enum llm_tensor {
|
|
|
250
256
|
LLM_TENSOR_SSM_A,
|
|
251
257
|
LLM_TENSOR_SSM_D,
|
|
252
258
|
LLM_TENSOR_SSM_OUT,
|
|
259
|
+
LLM_TENSOR_TIME_MIX_W0,
|
|
253
260
|
LLM_TENSOR_TIME_MIX_W1,
|
|
254
261
|
LLM_TENSOR_TIME_MIX_W2,
|
|
262
|
+
LLM_TENSOR_TIME_MIX_A0,
|
|
263
|
+
LLM_TENSOR_TIME_MIX_A1,
|
|
264
|
+
LLM_TENSOR_TIME_MIX_A2,
|
|
265
|
+
LLM_TENSOR_TIME_MIX_V0,
|
|
266
|
+
LLM_TENSOR_TIME_MIX_V1,
|
|
267
|
+
LLM_TENSOR_TIME_MIX_V2,
|
|
268
|
+
LLM_TENSOR_TIME_MIX_G1,
|
|
269
|
+
LLM_TENSOR_TIME_MIX_G2,
|
|
270
|
+
LLM_TENSOR_TIME_MIX_K_K,
|
|
271
|
+
LLM_TENSOR_TIME_MIX_K_A,
|
|
272
|
+
LLM_TENSOR_TIME_MIX_R_K,
|
|
255
273
|
LLM_TENSOR_TIME_MIX_LERP_X,
|
|
256
274
|
LLM_TENSOR_TIME_MIX_LERP_W,
|
|
257
275
|
LLM_TENSOR_TIME_MIX_LERP_K,
|
|
@@ -42,9 +42,9 @@ struct llama_sbatch {
|
|
|
42
42
|
bool logits_all; // TODO: remove once lctx.logits_all is removed too
|
|
43
43
|
|
|
44
44
|
// sorted indices into the batch
|
|
45
|
-
std::vector<
|
|
45
|
+
std::vector<int64_t> ids;
|
|
46
46
|
// batch indices of the output
|
|
47
|
-
std::vector<
|
|
47
|
+
std::vector<int64_t> out_ids;
|
|
48
48
|
std::vector<llama_sbatch_seq> seq;
|
|
49
49
|
|
|
50
50
|
const llama_batch * batch = nullptr;
|