@fugood/llama.node 0.3.14 → 0.3.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/bin/darwin/arm64/llama-node.node +0 -0
  2. package/bin/darwin/x64/llama-node.node +0 -0
  3. package/bin/linux/arm64/llama-node.node +0 -0
  4. package/bin/linux/x64/llama-node.node +0 -0
  5. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  6. package/bin/linux-cuda/x64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  9. package/bin/win32/arm64/llama-node.node +0 -0
  10. package/bin/win32/arm64/node.lib +0 -0
  11. package/bin/win32/x64/llama-node.node +0 -0
  12. package/bin/win32/x64/node.lib +0 -0
  13. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  14. package/bin/win32-vulkan/arm64/node.lib +0 -0
  15. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  16. package/bin/win32-vulkan/x64/node.lib +0 -0
  17. package/package.json +1 -1
  18. package/src/llama.cpp/.github/workflows/build.yml +30 -1
  19. package/src/llama.cpp/CMakeLists.txt +9 -1
  20. package/src/llama.cpp/cmake/common.cmake +2 -0
  21. package/src/llama.cpp/common/arg.cpp +20 -2
  22. package/src/llama.cpp/common/common.cpp +6 -3
  23. package/src/llama.cpp/common/speculative.cpp +4 -4
  24. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +2 -2
  25. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +1 -1
  26. package/src/llama.cpp/examples/embedding/embedding.cpp +1 -1
  27. package/src/llama.cpp/examples/gritlm/gritlm.cpp +2 -2
  28. package/src/llama.cpp/examples/imatrix/imatrix.cpp +1 -1
  29. package/src/llama.cpp/examples/infill/infill.cpp +2 -2
  30. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +2 -2
  31. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +4 -4
  32. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +1 -1
  33. package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -6
  34. package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
  35. package/src/llama.cpp/examples/main/main.cpp +6 -6
  36. package/src/llama.cpp/examples/parallel/parallel.cpp +5 -5
  37. package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
  38. package/src/llama.cpp/examples/perplexity/perplexity.cpp +6 -6
  39. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +2 -2
  40. package/src/llama.cpp/examples/retrieval/retrieval.cpp +1 -1
  41. package/src/llama.cpp/examples/run/run.cpp +91 -46
  42. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +2 -2
  43. package/src/llama.cpp/examples/server/server.cpp +37 -15
  44. package/src/llama.cpp/examples/server/utils.hpp +3 -1
  45. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
  46. package/src/llama.cpp/examples/speculative/speculative.cpp +14 -14
  47. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  48. package/src/llama.cpp/examples/tts/tts.cpp +20 -9
  49. package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
  50. package/src/llama.cpp/ggml/cmake/common.cmake +26 -0
  51. package/src/llama.cpp/ggml/include/ggml.h +24 -0
  52. package/src/llama.cpp/ggml/src/CMakeLists.txt +10 -28
  53. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +6 -2
  54. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -5
  55. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +15 -7
  56. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +1493 -12
  57. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +150 -1
  58. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +284 -29
  59. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +2 -1
  60. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -1
  61. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +7 -0
  62. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -4
  63. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +95 -22
  64. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +35 -12
  65. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -1
  66. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +93 -27
  67. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  68. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +12 -13
  69. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +40 -40
  70. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +12 -43
  71. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -2
  72. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +109 -40
  73. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -1
  74. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +19 -20
  75. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +114 -6
  76. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +6 -0
  77. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -1
  78. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
  79. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
  80. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +398 -158
  81. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -4
  82. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +7 -2
  83. package/src/llama.cpp/ggml/src/ggml.c +85 -2
  84. package/src/llama.cpp/include/llama.h +86 -22
  85. package/src/llama.cpp/src/CMakeLists.txt +5 -2
  86. package/src/llama.cpp/src/llama-adapter.cpp +19 -20
  87. package/src/llama.cpp/src/llama-adapter.h +11 -9
  88. package/src/llama.cpp/src/llama-arch.cpp +103 -16
  89. package/src/llama.cpp/src/llama-arch.h +18 -0
  90. package/src/llama.cpp/src/llama-batch.h +2 -2
  91. package/src/llama.cpp/src/llama-context.cpp +2253 -1222
  92. package/src/llama.cpp/src/llama-context.h +214 -77
  93. package/src/llama.cpp/src/llama-cparams.h +1 -0
  94. package/src/llama.cpp/src/llama-graph.cpp +1662 -0
  95. package/src/llama.cpp/src/llama-graph.h +574 -0
  96. package/src/llama.cpp/src/llama-hparams.cpp +8 -0
  97. package/src/llama.cpp/src/llama-hparams.h +9 -0
  98. package/src/llama.cpp/src/llama-io.cpp +15 -0
  99. package/src/llama.cpp/src/llama-io.h +35 -0
  100. package/src/llama.cpp/src/llama-kv-cache.cpp +1006 -291
  101. package/src/llama.cpp/src/llama-kv-cache.h +178 -110
  102. package/src/llama.cpp/src/llama-memory.cpp +1 -0
  103. package/src/llama.cpp/src/llama-memory.h +21 -0
  104. package/src/llama.cpp/src/llama-model.cpp +8244 -173
  105. package/src/llama.cpp/src/llama-model.h +34 -1
  106. package/src/llama.cpp/src/llama-quant.cpp +10 -1
  107. package/src/llama.cpp/src/llama.cpp +51 -9984
  108. package/src/llama.cpp/tests/test-backend-ops.cpp +145 -23
  109. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +0 -143
  110. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +0 -9
@@ -59,6 +59,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
59
59
  { LLM_ARCH_EXAONE, "exaone" },
60
60
  { LLM_ARCH_RWKV6, "rwkv6" },
61
61
  { LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
62
+ { LLM_ARCH_RWKV7, "rwkv7" },
63
+ { LLM_ARCH_ARWKV7, "arwkv7" },
62
64
  { LLM_ARCH_GRANITE, "granite" },
63
65
  { LLM_ARCH_GRANITE_MOE, "granitemoe" },
64
66
  { LLM_ARCH_CHAMELEON, "chameleon" },
@@ -110,22 +112,26 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
110
112
  { LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
111
113
  { LLM_KV_TOKEN_SHIFT_COUNT, "%s.token_shift_count" },
112
114
 
113
- { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
114
- { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
115
- { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
116
- { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
117
- { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
118
- { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
119
- { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
120
- { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
121
- { LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
122
- { LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
123
- { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
124
- { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
125
- { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
126
- { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
127
- { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
128
- { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
115
+ { LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
116
+ { LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
117
+ { LLM_KV_ATTENTION_MAX_ALIBI_BIAS, "%s.attention.max_alibi_bias" },
118
+ { LLM_KV_ATTENTION_CLAMP_KQV, "%s.attention.clamp_kqv" },
119
+ { LLM_KV_ATTENTION_KEY_LENGTH, "%s.attention.key_length" },
120
+ { LLM_KV_ATTENTION_VALUE_LENGTH, "%s.attention.value_length" },
121
+ { LLM_KV_ATTENTION_LAYERNORM_EPS, "%s.attention.layer_norm_epsilon" },
122
+ { LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, "%s.attention.layer_norm_rms_epsilon" },
123
+ { LLM_KV_ATTENTION_GROUPNORM_EPS, "%s.attention.group_norm_epsilon" },
124
+ { LLM_KV_ATTENTION_GROUPNORM_GROUPS, "%s.attention.group_norm_groups" },
125
+ { LLM_KV_ATTENTION_CAUSAL, "%s.attention.causal" },
126
+ { LLM_KV_ATTENTION_Q_LORA_RANK, "%s.attention.q_lora_rank" },
127
+ { LLM_KV_ATTENTION_KV_LORA_RANK, "%s.attention.kv_lora_rank" },
128
+ { LLM_KV_ATTENTION_DECAY_LORA_RANK, "%s.attention.decay_lora_rank" },
129
+ { LLM_KV_ATTENTION_ICLR_LORA_RANK, "%s.attention.iclr_lora_rank" },
130
+ { LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK, "%s.attention.value_residual_mix_lora_rank" },
131
+ { LLM_KV_ATTENTION_GATE_LORA_RANK, "%s.attention.gate_lora_rank" },
132
+ { LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT, "%s.attention.relative_buckets_count" },
133
+ { LLM_KV_ATTENTION_SLIDING_WINDOW, "%s.attention.sliding_window" },
134
+ { LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
129
135
 
130
136
  { LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
131
137
  { LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
@@ -772,6 +778,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
772
778
  {
773
779
  { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
774
780
  { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
781
+ { LLM_TENSOR_OUTPUT, "output" },
775
782
  { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
776
783
  { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
777
784
  { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
@@ -1238,6 +1245,74 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1238
1245
  { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1239
1246
  },
1240
1247
  },
1248
+ {
1249
+ LLM_ARCH_RWKV7,
1250
+ {
1251
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1252
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1253
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1254
+ { LLM_TENSOR_OUTPUT, "output" },
1255
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1256
+ { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" },
1257
+ { LLM_TENSOR_TIME_MIX_W0, "blk.%d.time_mix_w0" },
1258
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1259
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1260
+ { LLM_TENSOR_TIME_MIX_A0, "blk.%d.time_mix_a0" },
1261
+ { LLM_TENSOR_TIME_MIX_A1, "blk.%d.time_mix_a1" },
1262
+ { LLM_TENSOR_TIME_MIX_A2, "blk.%d.time_mix_a2" },
1263
+ { LLM_TENSOR_TIME_MIX_V0, "blk.%d.time_mix_v0" },
1264
+ { LLM_TENSOR_TIME_MIX_V1, "blk.%d.time_mix_v1" },
1265
+ { LLM_TENSOR_TIME_MIX_V2, "blk.%d.time_mix_v2" },
1266
+ { LLM_TENSOR_TIME_MIX_G1, "blk.%d.time_mix_g1" },
1267
+ { LLM_TENSOR_TIME_MIX_G2, "blk.%d.time_mix_g2" },
1268
+ { LLM_TENSOR_TIME_MIX_K_K, "blk.%d.time_mix_k_k" },
1269
+ { LLM_TENSOR_TIME_MIX_K_A, "blk.%d.time_mix_k_a" },
1270
+ { LLM_TENSOR_TIME_MIX_R_K, "blk.%d.time_mix_r_k" },
1271
+ { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1272
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1273
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1274
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1275
+ { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1276
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1277
+ { LLM_TENSOR_CHANNEL_MIX_LERP_K, "blk.%d.channel_mix_lerp_k" },
1278
+ { LLM_TENSOR_CHANNEL_MIX_KEY, "blk.%d.channel_mix_key" },
1279
+ { LLM_TENSOR_CHANNEL_MIX_VALUE, "blk.%d.channel_mix_value" },
1280
+ },
1281
+ },
1282
+ {
1283
+ LLM_ARCH_ARWKV7,
1284
+ {
1285
+ { LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1286
+ { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
1287
+ { LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1288
+ { LLM_TENSOR_OUTPUT, "output" },
1289
+ { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1290
+ { LLM_TENSOR_TIME_MIX_W0, "blk.%d.time_mix_w0" },
1291
+ { LLM_TENSOR_TIME_MIX_W1, "blk.%d.time_mix_w1" },
1292
+ { LLM_TENSOR_TIME_MIX_W2, "blk.%d.time_mix_w2" },
1293
+ { LLM_TENSOR_TIME_MIX_A0, "blk.%d.time_mix_a0" },
1294
+ { LLM_TENSOR_TIME_MIX_A1, "blk.%d.time_mix_a1" },
1295
+ { LLM_TENSOR_TIME_MIX_A2, "blk.%d.time_mix_a2" },
1296
+ { LLM_TENSOR_TIME_MIX_V0, "blk.%d.time_mix_v0" },
1297
+ { LLM_TENSOR_TIME_MIX_V1, "blk.%d.time_mix_v1" },
1298
+ { LLM_TENSOR_TIME_MIX_V2, "blk.%d.time_mix_v2" },
1299
+ { LLM_TENSOR_TIME_MIX_G1, "blk.%d.time_mix_g1" },
1300
+ { LLM_TENSOR_TIME_MIX_G2, "blk.%d.time_mix_g2" },
1301
+ { LLM_TENSOR_TIME_MIX_K_K, "blk.%d.time_mix_k_k" },
1302
+ { LLM_TENSOR_TIME_MIX_K_A, "blk.%d.time_mix_k_a" },
1303
+ { LLM_TENSOR_TIME_MIX_R_K, "blk.%d.time_mix_r_k" },
1304
+ { LLM_TENSOR_TIME_MIX_LERP_FUSED, "blk.%d.time_mix_lerp_fused" },
1305
+ { LLM_TENSOR_TIME_MIX_KEY, "blk.%d.time_mix_key" },
1306
+ { LLM_TENSOR_TIME_MIX_VALUE, "blk.%d.time_mix_value" },
1307
+ { LLM_TENSOR_TIME_MIX_RECEPTANCE, "blk.%d.time_mix_receptance" },
1308
+ { LLM_TENSOR_TIME_MIX_LN, "blk.%d.time_mix_ln" },
1309
+ { LLM_TENSOR_TIME_MIX_OUTPUT, "blk.%d.time_mix_output" },
1310
+ { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1311
+ { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1312
+ { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1313
+ { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1314
+ },
1315
+ },
1241
1316
  {
1242
1317
  LLM_ARCH_GRANITE,
1243
1318
  {
@@ -1397,6 +1472,12 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1397
1472
  {LLM_TENSOR_SSM_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1398
1473
  {LLM_TENSOR_TIME_MIX_W1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1399
1474
  {LLM_TENSOR_TIME_MIX_W2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1475
+ {LLM_TENSOR_TIME_MIX_A1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1476
+ {LLM_TENSOR_TIME_MIX_A2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1477
+ {LLM_TENSOR_TIME_MIX_V1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1478
+ {LLM_TENSOR_TIME_MIX_V2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1479
+ {LLM_TENSOR_TIME_MIX_G1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1480
+ {LLM_TENSOR_TIME_MIX_G2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1400
1481
  {LLM_TENSOR_TIME_MIX_DECAY_W1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1401
1482
  {LLM_TENSOR_TIME_MIX_DECAY_W2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1402
1483
  {LLM_TENSOR_TIME_MIX_KEY, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
@@ -1415,6 +1496,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1415
1496
  {LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1416
1497
  {LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1417
1498
  {LLM_TENSOR_CHANNEL_MIX_LERP_R, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1499
+ {LLM_TENSOR_TIME_MIX_K_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1500
+ {LLM_TENSOR_TIME_MIX_K_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1501
+ {LLM_TENSOR_TIME_MIX_R_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1418
1502
  {LLM_TENSOR_TIME_MIX_LERP_W, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1419
1503
  {LLM_TENSOR_TIME_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1420
1504
  {LLM_TENSOR_TIME_MIX_LERP_V, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
@@ -1422,6 +1506,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1422
1506
  {LLM_TENSOR_TIME_MIX_LERP_G, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1423
1507
  {LLM_TENSOR_TIME_MIX_LERP_FUSED, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1424
1508
  {LLM_TENSOR_TIME_MIX_DECAY, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1509
+ {LLM_TENSOR_TIME_MIX_W0, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1510
+ {LLM_TENSOR_TIME_MIX_A0, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1511
+ {LLM_TENSOR_TIME_MIX_V0, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1425
1512
  {LLM_TENSOR_TIME_MIX_FIRST, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_RWKV_WKV6}},
1426
1513
  {LLM_TENSOR_ATTN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1427
1514
  {LLM_TENSOR_ATTN_NORM_2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
@@ -63,6 +63,8 @@ enum llm_arch {
63
63
  LLM_ARCH_EXAONE,
64
64
  LLM_ARCH_RWKV6,
65
65
  LLM_ARCH_RWKV6QWEN2,
66
+ LLM_ARCH_RWKV7,
67
+ LLM_ARCH_ARWKV7,
66
68
  LLM_ARCH_GRANITE,
67
69
  LLM_ARCH_GRANITE_MOE,
68
70
  LLM_ARCH_CHAMELEON,
@@ -127,6 +129,10 @@ enum llm_kv {
127
129
  LLM_KV_ATTENTION_CAUSAL,
128
130
  LLM_KV_ATTENTION_Q_LORA_RANK,
129
131
  LLM_KV_ATTENTION_KV_LORA_RANK,
132
+ LLM_KV_ATTENTION_DECAY_LORA_RANK,
133
+ LLM_KV_ATTENTION_ICLR_LORA_RANK,
134
+ LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK,
135
+ LLM_KV_ATTENTION_GATE_LORA_RANK,
130
136
  LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
131
137
  LLM_KV_ATTENTION_SLIDING_WINDOW,
132
138
  LLM_KV_ATTENTION_SCALE,
@@ -250,8 +256,20 @@ enum llm_tensor {
250
256
  LLM_TENSOR_SSM_A,
251
257
  LLM_TENSOR_SSM_D,
252
258
  LLM_TENSOR_SSM_OUT,
259
+ LLM_TENSOR_TIME_MIX_W0,
253
260
  LLM_TENSOR_TIME_MIX_W1,
254
261
  LLM_TENSOR_TIME_MIX_W2,
262
+ LLM_TENSOR_TIME_MIX_A0,
263
+ LLM_TENSOR_TIME_MIX_A1,
264
+ LLM_TENSOR_TIME_MIX_A2,
265
+ LLM_TENSOR_TIME_MIX_V0,
266
+ LLM_TENSOR_TIME_MIX_V1,
267
+ LLM_TENSOR_TIME_MIX_V2,
268
+ LLM_TENSOR_TIME_MIX_G1,
269
+ LLM_TENSOR_TIME_MIX_G2,
270
+ LLM_TENSOR_TIME_MIX_K_K,
271
+ LLM_TENSOR_TIME_MIX_K_A,
272
+ LLM_TENSOR_TIME_MIX_R_K,
255
273
  LLM_TENSOR_TIME_MIX_LERP_X,
256
274
  LLM_TENSOR_TIME_MIX_LERP_W,
257
275
  LLM_TENSOR_TIME_MIX_LERP_K,
@@ -42,9 +42,9 @@ struct llama_sbatch {
42
42
  bool logits_all; // TODO: remove once lctx.logits_all is removed too
43
43
 
44
44
  // sorted indices into the batch
45
- std::vector<size_t> ids;
45
+ std::vector<int64_t> ids;
46
46
  // batch indices of the output
47
- std::vector<size_t> out_ids;
47
+ std::vector<int64_t> out_ids;
48
48
  std::vector<llama_sbatch_seq> seq;
49
49
 
50
50
  const llama_batch * batch = nullptr;