cui-llama.rn 1.4.3 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +93 -114
  2. package/android/src/main/CMakeLists.txt +5 -0
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +91 -17
  4. package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
  5. package/android/src/main/jni-utils.h +6 -0
  6. package/android/src/main/jni.cpp +289 -31
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  15. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
  16. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
  17. package/cpp/chat-template.hpp +529 -0
  18. package/cpp/chat.cpp +1779 -0
  19. package/cpp/chat.h +135 -0
  20. package/cpp/common.cpp +2064 -1873
  21. package/cpp/common.h +700 -699
  22. package/cpp/ggml-alloc.c +1039 -1042
  23. package/cpp/ggml-alloc.h +1 -1
  24. package/cpp/ggml-backend-impl.h +255 -255
  25. package/cpp/ggml-backend-reg.cpp +586 -582
  26. package/cpp/ggml-backend.cpp +2004 -2002
  27. package/cpp/ggml-backend.h +354 -354
  28. package/cpp/ggml-common.h +1851 -1853
  29. package/cpp/ggml-cpp.h +39 -39
  30. package/cpp/ggml-cpu-aarch64.cpp +4248 -4247
  31. package/cpp/ggml-cpu-aarch64.h +8 -8
  32. package/cpp/ggml-cpu-impl.h +531 -386
  33. package/cpp/ggml-cpu-quants.c +12527 -10920
  34. package/cpp/ggml-cpu-traits.cpp +36 -36
  35. package/cpp/ggml-cpu-traits.h +38 -38
  36. package/cpp/ggml-cpu.c +15766 -14391
  37. package/cpp/ggml-cpu.cpp +655 -635
  38. package/cpp/ggml-cpu.h +138 -135
  39. package/cpp/ggml-impl.h +567 -567
  40. package/cpp/ggml-metal-impl.h +235 -0
  41. package/cpp/ggml-metal.h +1 -1
  42. package/cpp/ggml-metal.m +5146 -4884
  43. package/cpp/ggml-opt.cpp +854 -854
  44. package/cpp/ggml-opt.h +216 -216
  45. package/cpp/ggml-quants.c +5238 -5238
  46. package/cpp/ggml-threading.h +14 -14
  47. package/cpp/ggml.c +6529 -6514
  48. package/cpp/ggml.h +2198 -2194
  49. package/cpp/gguf.cpp +1329 -1329
  50. package/cpp/gguf.h +202 -202
  51. package/cpp/json-schema-to-grammar.cpp +1024 -1045
  52. package/cpp/json-schema-to-grammar.h +21 -8
  53. package/cpp/json.hpp +24766 -24766
  54. package/cpp/llama-adapter.cpp +347 -347
  55. package/cpp/llama-adapter.h +74 -74
  56. package/cpp/llama-arch.cpp +1513 -1487
  57. package/cpp/llama-arch.h +403 -400
  58. package/cpp/llama-batch.cpp +368 -368
  59. package/cpp/llama-batch.h +88 -88
  60. package/cpp/llama-chat.cpp +588 -578
  61. package/cpp/llama-chat.h +53 -52
  62. package/cpp/llama-context.cpp +1775 -1775
  63. package/cpp/llama-context.h +128 -128
  64. package/cpp/llama-cparams.cpp +1 -1
  65. package/cpp/llama-cparams.h +37 -37
  66. package/cpp/llama-cpp.h +30 -30
  67. package/cpp/llama-grammar.cpp +1219 -1139
  68. package/cpp/llama-grammar.h +173 -143
  69. package/cpp/llama-hparams.cpp +71 -71
  70. package/cpp/llama-hparams.h +139 -139
  71. package/cpp/llama-impl.cpp +167 -167
  72. package/cpp/llama-impl.h +61 -61
  73. package/cpp/llama-kv-cache.cpp +718 -718
  74. package/cpp/llama-kv-cache.h +219 -218
  75. package/cpp/llama-mmap.cpp +600 -590
  76. package/cpp/llama-mmap.h +68 -67
  77. package/cpp/llama-model-loader.cpp +1124 -1124
  78. package/cpp/llama-model-loader.h +167 -167
  79. package/cpp/llama-model.cpp +4087 -3997
  80. package/cpp/llama-model.h +370 -370
  81. package/cpp/llama-sampling.cpp +2558 -2408
  82. package/cpp/llama-sampling.h +32 -32
  83. package/cpp/llama-vocab.cpp +3264 -3247
  84. package/cpp/llama-vocab.h +125 -125
  85. package/cpp/llama.cpp +10284 -10077
  86. package/cpp/llama.h +1354 -1323
  87. package/cpp/log.cpp +393 -401
  88. package/cpp/log.h +132 -121
  89. package/cpp/minja/chat-template.hpp +529 -0
  90. package/cpp/minja/minja.hpp +2915 -0
  91. package/cpp/minja.hpp +2915 -0
  92. package/cpp/rn-llama.cpp +66 -6
  93. package/cpp/rn-llama.h +26 -1
  94. package/cpp/sampling.cpp +570 -505
  95. package/cpp/sampling.h +3 -0
  96. package/cpp/sgemm.cpp +2598 -2597
  97. package/cpp/sgemm.h +14 -14
  98. package/cpp/speculative.cpp +278 -277
  99. package/cpp/speculative.h +28 -28
  100. package/cpp/unicode.cpp +9 -2
  101. package/ios/CMakeLists.txt +6 -0
  102. package/ios/RNLlama.h +0 -8
  103. package/ios/RNLlama.mm +27 -3
  104. package/ios/RNLlamaContext.h +10 -1
  105. package/ios/RNLlamaContext.mm +269 -57
  106. package/jest/mock.js +21 -2
  107. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  108. package/lib/commonjs/grammar.js +3 -0
  109. package/lib/commonjs/grammar.js.map +1 -1
  110. package/lib/commonjs/index.js +87 -13
  111. package/lib/commonjs/index.js.map +1 -1
  112. package/lib/module/NativeRNLlama.js.map +1 -1
  113. package/lib/module/grammar.js +3 -0
  114. package/lib/module/grammar.js.map +1 -1
  115. package/lib/module/index.js +86 -13
  116. package/lib/module/index.js.map +1 -1
  117. package/lib/typescript/NativeRNLlama.d.ts +107 -2
  118. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  119. package/lib/typescript/grammar.d.ts.map +1 -1
  120. package/lib/typescript/index.d.ts +32 -7
  121. package/lib/typescript/index.d.ts.map +1 -1
  122. package/llama-rn.podspec +1 -1
  123. package/package.json +3 -2
  124. package/src/NativeRNLlama.ts +115 -3
  125. package/src/grammar.ts +3 -0
  126. package/src/index.ts +138 -21
  127. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  128. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  129. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  130. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  131. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  132. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  133. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -55
  134. package/cpp/rn-llama.hpp +0 -913
package/cpp/llama-arch.h CHANGED
@@ -1,400 +1,403 @@
1
- #pragma once
2
-
3
- #include "ggml.h" // lm_ggml_op
4
-
5
- #include <string>
6
-
7
- //
8
- // gguf constants (sync with gguf.py)
9
- //
10
-
11
- enum llm_arch {
12
- LLM_ARCH_LLAMA,
13
- LLM_ARCH_DECI,
14
- LLM_ARCH_FALCON,
15
- LLM_ARCH_BAICHUAN,
16
- LLM_ARCH_GROK,
17
- LLM_ARCH_GPT2,
18
- LLM_ARCH_GPTJ,
19
- LLM_ARCH_GPTNEOX,
20
- LLM_ARCH_MPT,
21
- LLM_ARCH_STARCODER,
22
- LLM_ARCH_REFACT,
23
- LLM_ARCH_BERT,
24
- LLM_ARCH_NOMIC_BERT,
25
- LLM_ARCH_JINA_BERT_V2,
26
- LLM_ARCH_BLOOM,
27
- LLM_ARCH_STABLELM,
28
- LLM_ARCH_QWEN,
29
- LLM_ARCH_QWEN2,
30
- LLM_ARCH_QWEN2MOE,
31
- LLM_ARCH_QWEN2VL,
32
- LLM_ARCH_PHI2,
33
- LLM_ARCH_PHI3,
34
- LLM_ARCH_PHIMOE,
35
- LLM_ARCH_PLAMO,
36
- LLM_ARCH_CODESHELL,
37
- LLM_ARCH_ORION,
38
- LLM_ARCH_INTERNLM2,
39
- LLM_ARCH_MINICPM,
40
- LLM_ARCH_MINICPM3,
41
- LLM_ARCH_GEMMA,
42
- LLM_ARCH_GEMMA2,
43
- LLM_ARCH_STARCODER2,
44
- LLM_ARCH_MAMBA,
45
- LLM_ARCH_XVERSE,
46
- LLM_ARCH_COMMAND_R,
47
- LLM_ARCH_COHERE2,
48
- LLM_ARCH_DBRX,
49
- LLM_ARCH_OLMO,
50
- LLM_ARCH_OLMO2,
51
- LLM_ARCH_OLMOE,
52
- LLM_ARCH_OPENELM,
53
- LLM_ARCH_ARCTIC,
54
- LLM_ARCH_DEEPSEEK,
55
- LLM_ARCH_DEEPSEEK2,
56
- LLM_ARCH_CHATGLM,
57
- LLM_ARCH_BITNET,
58
- LLM_ARCH_T5,
59
- LLM_ARCH_T5ENCODER,
60
- LLM_ARCH_JAIS,
61
- LLM_ARCH_NEMOTRON,
62
- LLM_ARCH_EXAONE,
63
- LLM_ARCH_RWKV6,
64
- LLM_ARCH_RWKV6QWEN2,
65
- LLM_ARCH_GRANITE,
66
- LLM_ARCH_GRANITE_MOE,
67
- LLM_ARCH_CHAMELEON,
68
- LLM_ARCH_WAVTOKENIZER_DEC,
69
- LLM_ARCH_UNKNOWN,
70
- };
71
-
72
- enum llm_kv {
73
- LLM_KV_GENERAL_TYPE,
74
- LLM_KV_GENERAL_ARCHITECTURE,
75
- LLM_KV_GENERAL_QUANTIZATION_VERSION,
76
- LLM_KV_GENERAL_ALIGNMENT,
77
- LLM_KV_GENERAL_NAME,
78
- LLM_KV_GENERAL_AUTHOR,
79
- LLM_KV_GENERAL_VERSION,
80
- LLM_KV_GENERAL_URL,
81
- LLM_KV_GENERAL_DESCRIPTION,
82
- LLM_KV_GENERAL_LICENSE,
83
- LLM_KV_GENERAL_SOURCE_URL,
84
- LLM_KV_GENERAL_SOURCE_HF_REPO,
85
-
86
- LLM_KV_VOCAB_SIZE,
87
- LLM_KV_CONTEXT_LENGTH,
88
- LLM_KV_EMBEDDING_LENGTH,
89
- LLM_KV_FEATURES_LENGTH,
90
- LLM_KV_BLOCK_COUNT,
91
- LLM_KV_LEADING_DENSE_BLOCK_COUNT,
92
- LLM_KV_FEED_FORWARD_LENGTH,
93
- LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
94
- LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
95
- LLM_KV_USE_PARALLEL_RESIDUAL,
96
- LLM_KV_TENSOR_DATA_LAYOUT,
97
- LLM_KV_EXPERT_COUNT,
98
- LLM_KV_EXPERT_USED_COUNT,
99
- LLM_KV_EXPERT_SHARED_COUNT,
100
- LLM_KV_EXPERT_WEIGHTS_SCALE,
101
- LLM_KV_EXPERT_WEIGHTS_NORM,
102
- LLM_KV_EXPERT_GATING_FUNC,
103
- LLM_KV_POOLING_TYPE,
104
- LLM_KV_LOGIT_SCALE,
105
- LLM_KV_DECODER_START_TOKEN_ID,
106
- LLM_KV_ATTN_LOGIT_SOFTCAPPING,
107
- LLM_KV_FINAL_LOGIT_SOFTCAPPING,
108
- LLM_KV_SWIN_NORM,
109
- LLM_KV_RESCALE_EVERY_N_LAYERS,
110
- LLM_KV_TIME_MIX_EXTRA_DIM,
111
- LLM_KV_TIME_DECAY_EXTRA_DIM,
112
- LLM_KV_RESIDUAL_SCALE,
113
- LLM_KV_EMBEDDING_SCALE,
114
- LLM_KV_TOKEN_SHIFT_COUNT,
115
-
116
- LLM_KV_ATTENTION_HEAD_COUNT,
117
- LLM_KV_ATTENTION_HEAD_COUNT_KV,
118
- LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
119
- LLM_KV_ATTENTION_CLAMP_KQV,
120
- LLM_KV_ATTENTION_KEY_LENGTH,
121
- LLM_KV_ATTENTION_VALUE_LENGTH,
122
- LLM_KV_ATTENTION_LAYERNORM_EPS,
123
- LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
124
- LLM_KV_ATTENTION_GROUPNORM_EPS,
125
- LLM_KV_ATTENTION_GROUPNORM_GROUPS,
126
- LLM_KV_ATTENTION_CAUSAL,
127
- LLM_KV_ATTENTION_Q_LORA_RANK,
128
- LLM_KV_ATTENTION_KV_LORA_RANK,
129
- LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
130
- LLM_KV_ATTENTION_SLIDING_WINDOW,
131
- LLM_KV_ATTENTION_SCALE,
132
-
133
- LLM_KV_ROPE_DIMENSION_COUNT,
134
- LLM_KV_ROPE_DIMENSION_SECTIONS,
135
- LLM_KV_ROPE_FREQ_BASE,
136
- LLM_KV_ROPE_SCALE_LINEAR,
137
- LLM_KV_ROPE_SCALING_TYPE,
138
- LLM_KV_ROPE_SCALING_FACTOR,
139
- LLM_KV_ROPE_SCALING_ATTN_FACTOR,
140
- LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
141
- LLM_KV_ROPE_SCALING_FINETUNED,
142
- LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
143
-
144
- LLM_KV_SPLIT_NO,
145
- LLM_KV_SPLIT_COUNT,
146
- LLM_KV_SPLIT_TENSORS_COUNT,
147
-
148
- LLM_KV_SSM_INNER_SIZE,
149
- LLM_KV_SSM_CONV_KERNEL,
150
- LLM_KV_SSM_STATE_SIZE,
151
- LLM_KV_SSM_TIME_STEP_RANK,
152
- LLM_KV_SSM_DT_B_C_RMS,
153
-
154
- LLM_KV_WKV_HEAD_SIZE,
155
-
156
- LLM_KV_TOKENIZER_MODEL,
157
- LLM_KV_TOKENIZER_PRE,
158
- LLM_KV_TOKENIZER_LIST,
159
- LLM_KV_TOKENIZER_TOKEN_TYPE,
160
- LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
161
- LLM_KV_TOKENIZER_SCORES,
162
- LLM_KV_TOKENIZER_MERGES,
163
- LLM_KV_TOKENIZER_BOS_ID,
164
- LLM_KV_TOKENIZER_EOS_ID,
165
- LLM_KV_TOKENIZER_EOT_ID,
166
- LLM_KV_TOKENIZER_EOM_ID,
167
- LLM_KV_TOKENIZER_UNK_ID,
168
- LLM_KV_TOKENIZER_SEP_ID,
169
- LLM_KV_TOKENIZER_PAD_ID,
170
- LLM_KV_TOKENIZER_CLS_ID,
171
- LLM_KV_TOKENIZER_MASK_ID,
172
- LLM_KV_TOKENIZER_ADD_BOS,
173
- LLM_KV_TOKENIZER_ADD_EOS,
174
- LLM_KV_TOKENIZER_ADD_PREFIX,
175
- LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
176
- LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
177
- LLM_KV_TOKENIZER_HF_JSON,
178
- LLM_KV_TOKENIZER_RWKV,
179
- LLM_KV_TOKENIZER_CHAT_TEMPLATE,
180
- LLM_KV_TOKENIZER_FIM_PRE_ID,
181
- LLM_KV_TOKENIZER_FIM_SUF_ID,
182
- LLM_KV_TOKENIZER_FIM_MID_ID,
183
- LLM_KV_TOKENIZER_FIM_PAD_ID,
184
- LLM_KV_TOKENIZER_FIM_REP_ID,
185
- LLM_KV_TOKENIZER_FIM_SEP_ID,
186
-
187
- LLM_KV_ADAPTER_TYPE,
188
- LLM_KV_ADAPTER_LORA_ALPHA,
189
-
190
- LLM_KV_POSNET_EMBEDDING_LENGTH,
191
- LLM_KV_POSNET_BLOCK_COUNT,
192
-
193
- LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
194
- LLM_KV_CONVNEXT_BLOCK_COUNT,
195
-
196
- // deprecated:
197
- LLM_KV_TOKENIZER_PREFIX_ID,
198
- LLM_KV_TOKENIZER_SUFFIX_ID,
199
- LLM_KV_TOKENIZER_MIDDLE_ID,
200
- };
201
-
202
- enum llm_tensor {
203
- LLM_TENSOR_TOKEN_EMBD,
204
- LLM_TENSOR_TOKEN_EMBD_NORM,
205
- LLM_TENSOR_TOKEN_TYPES,
206
- LLM_TENSOR_POS_EMBD,
207
- LLM_TENSOR_OUTPUT,
208
- LLM_TENSOR_OUTPUT_NORM,
209
- LLM_TENSOR_ROPE_FREQS,
210
- LLM_TENSOR_ROPE_FACTORS_LONG,
211
- LLM_TENSOR_ROPE_FACTORS_SHORT,
212
- LLM_TENSOR_ATTN_Q,
213
- LLM_TENSOR_ATTN_K,
214
- LLM_TENSOR_ATTN_V,
215
- LLM_TENSOR_ATTN_QKV,
216
- LLM_TENSOR_ATTN_OUT,
217
- LLM_TENSOR_ATTN_NORM,
218
- LLM_TENSOR_ATTN_NORM_2,
219
- LLM_TENSOR_ATTN_OUT_NORM,
220
- LLM_TENSOR_ATTN_POST_NORM,
221
- LLM_TENSOR_ATTN_ROT_EMBD,
222
- LLM_TENSOR_FFN_GATE_INP,
223
- LLM_TENSOR_FFN_GATE_INP_SHEXP,
224
- LLM_TENSOR_FFN_NORM,
225
- LLM_TENSOR_FFN_POST_NORM,
226
- LLM_TENSOR_FFN_GATE,
227
- LLM_TENSOR_FFN_DOWN,
228
- LLM_TENSOR_FFN_UP,
229
- LLM_TENSOR_FFN_ACT,
230
- LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
231
- LLM_TENSOR_FFN_GATE_EXP,
232
- LLM_TENSOR_FFN_UP_EXP,
233
- LLM_TENSOR_FFN_NORM_EXPS,
234
- LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
235
- LLM_TENSOR_FFN_GATE_EXPS,
236
- LLM_TENSOR_FFN_UP_EXPS,
237
- LLM_TENSOR_FFN_DOWN_SHEXP,
238
- LLM_TENSOR_FFN_GATE_SHEXP,
239
- LLM_TENSOR_FFN_UP_SHEXP,
240
- LLM_TENSOR_FFN_EXP_PROBS_B,
241
- LLM_TENSOR_ATTN_Q_NORM,
242
- LLM_TENSOR_ATTN_K_NORM,
243
- LLM_TENSOR_LAYER_OUT_NORM,
244
- LLM_TENSOR_SSM_IN,
245
- LLM_TENSOR_SSM_CONV1D,
246
- LLM_TENSOR_SSM_X,
247
- LLM_TENSOR_SSM_DT,
248
- LLM_TENSOR_SSM_A,
249
- LLM_TENSOR_SSM_D,
250
- LLM_TENSOR_SSM_OUT,
251
- LLM_TENSOR_TIME_MIX_W1,
252
- LLM_TENSOR_TIME_MIX_W2,
253
- LLM_TENSOR_TIME_MIX_LERP_X,
254
- LLM_TENSOR_TIME_MIX_LERP_W,
255
- LLM_TENSOR_TIME_MIX_LERP_K,
256
- LLM_TENSOR_TIME_MIX_LERP_V,
257
- LLM_TENSOR_TIME_MIX_LERP_R,
258
- LLM_TENSOR_TIME_MIX_LERP_G,
259
- LLM_TENSOR_TIME_MIX_LERP_FUSED,
260
- LLM_TENSOR_TIME_MIX_FIRST,
261
- LLM_TENSOR_TIME_MIX_DECAY,
262
- LLM_TENSOR_TIME_MIX_DECAY_W1,
263
- LLM_TENSOR_TIME_MIX_DECAY_W2,
264
- LLM_TENSOR_TIME_MIX_KEY,
265
- LLM_TENSOR_TIME_MIX_VALUE,
266
- LLM_TENSOR_TIME_MIX_RECEPTANCE,
267
- LLM_TENSOR_TIME_MIX_GATE,
268
- LLM_TENSOR_TIME_MIX_LN,
269
- LLM_TENSOR_TIME_MIX_OUTPUT,
270
- LLM_TENSOR_CHANNEL_MIX_LERP_K,
271
- LLM_TENSOR_CHANNEL_MIX_LERP_R,
272
- LLM_TENSOR_CHANNEL_MIX_KEY,
273
- LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
274
- LLM_TENSOR_CHANNEL_MIX_VALUE,
275
- LLM_TENSOR_ATTN_Q_A,
276
- LLM_TENSOR_ATTN_Q_B,
277
- LLM_TENSOR_ATTN_KV_A_MQA,
278
- LLM_TENSOR_ATTN_KV_B,
279
- LLM_TENSOR_ATTN_Q_A_NORM,
280
- LLM_TENSOR_ATTN_KV_A_NORM,
281
- LLM_TENSOR_ATTN_SUB_NORM,
282
- LLM_TENSOR_FFN_SUB_NORM,
283
- LLM_TENSOR_DEC_ATTN_NORM,
284
- LLM_TENSOR_DEC_ATTN_Q,
285
- LLM_TENSOR_DEC_ATTN_K,
286
- LLM_TENSOR_DEC_ATTN_V,
287
- LLM_TENSOR_DEC_ATTN_OUT,
288
- LLM_TENSOR_DEC_ATTN_REL_B,
289
- LLM_TENSOR_DEC_CROSS_ATTN_NORM,
290
- LLM_TENSOR_DEC_CROSS_ATTN_Q,
291
- LLM_TENSOR_DEC_CROSS_ATTN_K,
292
- LLM_TENSOR_DEC_CROSS_ATTN_V,
293
- LLM_TENSOR_DEC_CROSS_ATTN_OUT,
294
- LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
295
- LLM_TENSOR_DEC_FFN_NORM,
296
- LLM_TENSOR_DEC_FFN_GATE,
297
- LLM_TENSOR_DEC_FFN_DOWN,
298
- LLM_TENSOR_DEC_FFN_UP,
299
- LLM_TENSOR_DEC_OUTPUT_NORM,
300
- LLM_TENSOR_ENC_ATTN_NORM,
301
- LLM_TENSOR_ENC_ATTN_Q,
302
- LLM_TENSOR_ENC_ATTN_K,
303
- LLM_TENSOR_ENC_ATTN_V,
304
- LLM_TENSOR_ENC_ATTN_OUT,
305
- LLM_TENSOR_ENC_ATTN_REL_B,
306
- LLM_TENSOR_ENC_FFN_NORM,
307
- LLM_TENSOR_ENC_FFN_GATE,
308
- LLM_TENSOR_ENC_FFN_DOWN,
309
- LLM_TENSOR_ENC_FFN_UP,
310
- LLM_TENSOR_ENC_OUTPUT_NORM,
311
- LLM_TENSOR_CLS,
312
- LLM_TENSOR_CLS_OUT,
313
- LLM_TENSOR_CONV1D,
314
- LLM_TENSOR_CONVNEXT_DW,
315
- LLM_TENSOR_CONVNEXT_NORM,
316
- LLM_TENSOR_CONVNEXT_PW1,
317
- LLM_TENSOR_CONVNEXT_PW2,
318
- LLM_TENSOR_CONVNEXT_GAMMA,
319
- LLM_TENSOR_POS_NET_CONV1,
320
- LLM_TENSOR_POS_NET_CONV2,
321
- LLM_TENSOR_POS_NET_NORM,
322
- LLM_TENSOR_POS_NET_NORM1,
323
- LLM_TENSOR_POS_NET_NORM2,
324
- LLM_TENSOR_POS_NET_ATTN_NORM,
325
- LLM_TENSOR_POS_NET_ATTN_Q,
326
- LLM_TENSOR_POS_NET_ATTN_K,
327
- LLM_TENSOR_POS_NET_ATTN_V,
328
- LLM_TENSOR_POS_NET_ATTN_OUT,
329
- };
330
-
331
- enum llm_tensor_layer {
332
- LLM_TENSOR_LAYER_INPUT,
333
- LLM_TENSOR_LAYER_REPEATING,
334
- LLM_TENSOR_LAYER_OUTPUT,
335
- };
336
-
337
- struct LLM_KV {
338
- LLM_KV(llm_arch arch);
339
-
340
- llm_arch arch;
341
-
342
- std::string operator()(llm_kv kv) const;
343
- };
344
-
345
- // helper to handle gguf constants
346
- // usage:
347
- //
348
- // const auto tn = LLM_TN(LLM_ARCH_LLAMA);
349
- //
350
- // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
351
- // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
352
- // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
353
- //
354
- struct LLM_TN_IMPL {
355
- const llm_arch arch;
356
- const llm_tensor tensor;
357
- const char * const suffix;
358
- const int bid;
359
- const int xid;
360
-
361
- std::string str() const;
362
-
363
- operator std::string() const {
364
- return str();
365
- }
366
-
367
- friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
368
- return str == tn.str();
369
- }
370
-
371
- friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
372
- return str != tn.str();
373
- }
374
- };
375
-
376
- struct LLM_TN {
377
- LLM_TN(llm_arch arch) : arch(arch) {}
378
-
379
- llm_arch arch;
380
-
381
- LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
382
- return { arch, tensor, suffix, bid, xid };
383
- }
384
-
385
- LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
386
- return { arch, tensor, nullptr, bid, xid };
387
- }
388
- };
389
-
390
-
391
- struct llm_tensor_info {
392
- llm_tensor_layer layer;
393
- lm_ggml_op op;
394
- };
395
-
396
- const char * llm_arch_name(llm_arch arch);
397
-
398
- llm_arch llm_arch_from_string(const std::string & name);
399
-
400
- const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
1
+ #pragma once
2
+
3
+ #include "ggml.h" // lm_ggml_op
4
+
5
+ #include <string>
6
+
7
+ //
8
+ // gguf constants (sync with gguf.py)
9
+ //
10
+
11
+ enum llm_arch {
12
+ LLM_ARCH_LLAMA,
13
+ LLM_ARCH_DECI,
14
+ LLM_ARCH_FALCON,
15
+ LLM_ARCH_BAICHUAN,
16
+ LLM_ARCH_GROK,
17
+ LLM_ARCH_GPT2,
18
+ LLM_ARCH_GPTJ,
19
+ LLM_ARCH_GPTNEOX,
20
+ LLM_ARCH_MPT,
21
+ LLM_ARCH_STARCODER,
22
+ LLM_ARCH_REFACT,
23
+ LLM_ARCH_BERT,
24
+ LLM_ARCH_NOMIC_BERT,
25
+ LLM_ARCH_JINA_BERT_V2,
26
+ LLM_ARCH_BLOOM,
27
+ LLM_ARCH_STABLELM,
28
+ LLM_ARCH_QWEN,
29
+ LLM_ARCH_QWEN2,
30
+ LLM_ARCH_QWEN2MOE,
31
+ LLM_ARCH_QWEN2VL,
32
+ LLM_ARCH_PHI2,
33
+ LLM_ARCH_PHI3,
34
+ LLM_ARCH_PHIMOE,
35
+ LLM_ARCH_PLAMO,
36
+ LLM_ARCH_CODESHELL,
37
+ LLM_ARCH_ORION,
38
+ LLM_ARCH_INTERNLM2,
39
+ LLM_ARCH_MINICPM,
40
+ LLM_ARCH_MINICPM3,
41
+ LLM_ARCH_GEMMA,
42
+ LLM_ARCH_GEMMA2,
43
+ LLM_ARCH_GEMMA3,
44
+ LLM_ARCH_STARCODER2,
45
+ LLM_ARCH_MAMBA,
46
+ LLM_ARCH_XVERSE,
47
+ LLM_ARCH_COMMAND_R,
48
+ LLM_ARCH_COHERE2,
49
+ LLM_ARCH_DBRX,
50
+ LLM_ARCH_OLMO,
51
+ LLM_ARCH_OLMO2,
52
+ LLM_ARCH_OLMOE,
53
+ LLM_ARCH_OPENELM,
54
+ LLM_ARCH_ARCTIC,
55
+ LLM_ARCH_DEEPSEEK,
56
+ LLM_ARCH_DEEPSEEK2,
57
+ LLM_ARCH_CHATGLM,
58
+ LLM_ARCH_BITNET,
59
+ LLM_ARCH_T5,
60
+ LLM_ARCH_T5ENCODER,
61
+ LLM_ARCH_JAIS,
62
+ LLM_ARCH_NEMOTRON,
63
+ LLM_ARCH_EXAONE,
64
+ LLM_ARCH_RWKV6,
65
+ LLM_ARCH_RWKV6QWEN2,
66
+ LLM_ARCH_GRANITE,
67
+ LLM_ARCH_GRANITE_MOE,
68
+ LLM_ARCH_CHAMELEON,
69
+ LLM_ARCH_WAVTOKENIZER_DEC,
70
+ LLM_ARCH_UNKNOWN,
71
+ };
72
+
73
+ enum llm_kv {
74
+ LLM_KV_GENERAL_TYPE,
75
+ LLM_KV_GENERAL_ARCHITECTURE,
76
+ LLM_KV_GENERAL_QUANTIZATION_VERSION,
77
+ LLM_KV_GENERAL_ALIGNMENT,
78
+ LLM_KV_GENERAL_NAME,
79
+ LLM_KV_GENERAL_AUTHOR,
80
+ LLM_KV_GENERAL_VERSION,
81
+ LLM_KV_GENERAL_URL,
82
+ LLM_KV_GENERAL_DESCRIPTION,
83
+ LLM_KV_GENERAL_LICENSE,
84
+ LLM_KV_GENERAL_SOURCE_URL,
85
+ LLM_KV_GENERAL_SOURCE_HF_REPO,
86
+
87
+ LLM_KV_VOCAB_SIZE,
88
+ LLM_KV_CONTEXT_LENGTH,
89
+ LLM_KV_EMBEDDING_LENGTH,
90
+ LLM_KV_FEATURES_LENGTH,
91
+ LLM_KV_BLOCK_COUNT,
92
+ LLM_KV_LEADING_DENSE_BLOCK_COUNT,
93
+ LLM_KV_FEED_FORWARD_LENGTH,
94
+ LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
95
+ LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
96
+ LLM_KV_USE_PARALLEL_RESIDUAL,
97
+ LLM_KV_TENSOR_DATA_LAYOUT,
98
+ LLM_KV_EXPERT_COUNT,
99
+ LLM_KV_EXPERT_USED_COUNT,
100
+ LLM_KV_EXPERT_SHARED_COUNT,
101
+ LLM_KV_EXPERT_WEIGHTS_SCALE,
102
+ LLM_KV_EXPERT_WEIGHTS_NORM,
103
+ LLM_KV_EXPERT_GATING_FUNC,
104
+ LLM_KV_POOLING_TYPE,
105
+ LLM_KV_LOGIT_SCALE,
106
+ LLM_KV_DECODER_START_TOKEN_ID,
107
+ LLM_KV_ATTN_LOGIT_SOFTCAPPING,
108
+ LLM_KV_FINAL_LOGIT_SOFTCAPPING,
109
+ LLM_KV_SWIN_NORM,
110
+ LLM_KV_RESCALE_EVERY_N_LAYERS,
111
+ LLM_KV_TIME_MIX_EXTRA_DIM,
112
+ LLM_KV_TIME_DECAY_EXTRA_DIM,
113
+ LLM_KV_RESIDUAL_SCALE,
114
+ LLM_KV_EMBEDDING_SCALE,
115
+ LLM_KV_TOKEN_SHIFT_COUNT,
116
+
117
+ LLM_KV_ATTENTION_HEAD_COUNT,
118
+ LLM_KV_ATTENTION_HEAD_COUNT_KV,
119
+ LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
120
+ LLM_KV_ATTENTION_CLAMP_KQV,
121
+ LLM_KV_ATTENTION_KEY_LENGTH,
122
+ LLM_KV_ATTENTION_VALUE_LENGTH,
123
+ LLM_KV_ATTENTION_LAYERNORM_EPS,
124
+ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
125
+ LLM_KV_ATTENTION_GROUPNORM_EPS,
126
+ LLM_KV_ATTENTION_GROUPNORM_GROUPS,
127
+ LLM_KV_ATTENTION_CAUSAL,
128
+ LLM_KV_ATTENTION_Q_LORA_RANK,
129
+ LLM_KV_ATTENTION_KV_LORA_RANK,
130
+ LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
131
+ LLM_KV_ATTENTION_SLIDING_WINDOW,
132
+ LLM_KV_ATTENTION_SCALE,
133
+
134
+ LLM_KV_ROPE_DIMENSION_COUNT,
135
+ LLM_KV_ROPE_DIMENSION_SECTIONS,
136
+ LLM_KV_ROPE_FREQ_BASE,
137
+ LLM_KV_ROPE_SCALE_LINEAR,
138
+ LLM_KV_ROPE_SCALING_TYPE,
139
+ LLM_KV_ROPE_SCALING_FACTOR,
140
+ LLM_KV_ROPE_SCALING_ATTN_FACTOR,
141
+ LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
142
+ LLM_KV_ROPE_SCALING_FINETUNED,
143
+ LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
144
+
145
+ LLM_KV_SPLIT_NO,
146
+ LLM_KV_SPLIT_COUNT,
147
+ LLM_KV_SPLIT_TENSORS_COUNT,
148
+
149
+ LLM_KV_SSM_INNER_SIZE,
150
+ LLM_KV_SSM_CONV_KERNEL,
151
+ LLM_KV_SSM_STATE_SIZE,
152
+ LLM_KV_SSM_TIME_STEP_RANK,
153
+ LLM_KV_SSM_DT_B_C_RMS,
154
+
155
+ LLM_KV_WKV_HEAD_SIZE,
156
+
157
+ LLM_KV_TOKENIZER_MODEL,
158
+ LLM_KV_TOKENIZER_PRE,
159
+ LLM_KV_TOKENIZER_LIST,
160
+ LLM_KV_TOKENIZER_TOKEN_TYPE,
161
+ LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
162
+ LLM_KV_TOKENIZER_SCORES,
163
+ LLM_KV_TOKENIZER_MERGES,
164
+ LLM_KV_TOKENIZER_BOS_ID,
165
+ LLM_KV_TOKENIZER_EOS_ID,
166
+ LLM_KV_TOKENIZER_EOT_ID,
167
+ LLM_KV_TOKENIZER_EOM_ID,
168
+ LLM_KV_TOKENIZER_UNK_ID,
169
+ LLM_KV_TOKENIZER_SEP_ID,
170
+ LLM_KV_TOKENIZER_PAD_ID,
171
+ LLM_KV_TOKENIZER_CLS_ID,
172
+ LLM_KV_TOKENIZER_MASK_ID,
173
+ LLM_KV_TOKENIZER_ADD_BOS,
174
+ LLM_KV_TOKENIZER_ADD_EOS,
175
+ LLM_KV_TOKENIZER_ADD_PREFIX,
176
+ LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
177
+ LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
178
+ LLM_KV_TOKENIZER_HF_JSON,
179
+ LLM_KV_TOKENIZER_RWKV,
180
+ LLM_KV_TOKENIZER_CHAT_TEMPLATE,
181
+ LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
182
+ LLM_KV_TOKENIZER_FIM_PRE_ID,
183
+ LLM_KV_TOKENIZER_FIM_SUF_ID,
184
+ LLM_KV_TOKENIZER_FIM_MID_ID,
185
+ LLM_KV_TOKENIZER_FIM_PAD_ID,
186
+ LLM_KV_TOKENIZER_FIM_REP_ID,
187
+ LLM_KV_TOKENIZER_FIM_SEP_ID,
188
+
189
+ LLM_KV_ADAPTER_TYPE,
190
+ LLM_KV_ADAPTER_LORA_ALPHA,
191
+
192
+ LLM_KV_POSNET_EMBEDDING_LENGTH,
193
+ LLM_KV_POSNET_BLOCK_COUNT,
194
+
195
+ LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
196
+ LLM_KV_CONVNEXT_BLOCK_COUNT,
197
+
198
+ // deprecated:
199
+ LLM_KV_TOKENIZER_PREFIX_ID,
200
+ LLM_KV_TOKENIZER_SUFFIX_ID,
201
+ LLM_KV_TOKENIZER_MIDDLE_ID,
202
+ };
203
+
204
+ enum llm_tensor {
205
+ LLM_TENSOR_TOKEN_EMBD,
206
+ LLM_TENSOR_TOKEN_EMBD_NORM,
207
+ LLM_TENSOR_TOKEN_TYPES,
208
+ LLM_TENSOR_POS_EMBD,
209
+ LLM_TENSOR_OUTPUT,
210
+ LLM_TENSOR_OUTPUT_NORM,
211
+ LLM_TENSOR_ROPE_FREQS,
212
+ LLM_TENSOR_ROPE_FACTORS_LONG,
213
+ LLM_TENSOR_ROPE_FACTORS_SHORT,
214
+ LLM_TENSOR_ATTN_Q,
215
+ LLM_TENSOR_ATTN_K,
216
+ LLM_TENSOR_ATTN_V,
217
+ LLM_TENSOR_ATTN_QKV,
218
+ LLM_TENSOR_ATTN_OUT,
219
+ LLM_TENSOR_ATTN_NORM,
220
+ LLM_TENSOR_ATTN_NORM_2,
221
+ LLM_TENSOR_ATTN_OUT_NORM,
222
+ LLM_TENSOR_ATTN_POST_NORM,
223
+ LLM_TENSOR_ATTN_ROT_EMBD,
224
+ LLM_TENSOR_FFN_GATE_INP,
225
+ LLM_TENSOR_FFN_GATE_INP_SHEXP,
226
+ LLM_TENSOR_FFN_NORM,
227
+ LLM_TENSOR_FFN_POST_NORM,
228
+ LLM_TENSOR_FFN_GATE,
229
+ LLM_TENSOR_FFN_DOWN,
230
+ LLM_TENSOR_FFN_UP,
231
+ LLM_TENSOR_FFN_ACT,
232
+ LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
233
+ LLM_TENSOR_FFN_GATE_EXP,
234
+ LLM_TENSOR_FFN_UP_EXP,
235
+ LLM_TENSOR_FFN_NORM_EXPS,
236
+ LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
237
+ LLM_TENSOR_FFN_GATE_EXPS,
238
+ LLM_TENSOR_FFN_UP_EXPS,
239
+ LLM_TENSOR_FFN_DOWN_SHEXP,
240
+ LLM_TENSOR_FFN_GATE_SHEXP,
241
+ LLM_TENSOR_FFN_UP_SHEXP,
242
+ LLM_TENSOR_FFN_EXP_PROBS_B,
243
+ LLM_TENSOR_ATTN_Q_NORM,
244
+ LLM_TENSOR_ATTN_K_NORM,
245
+ LLM_TENSOR_LAYER_OUT_NORM,
246
+ LLM_TENSOR_SSM_IN,
247
+ LLM_TENSOR_SSM_CONV1D,
248
+ LLM_TENSOR_SSM_X,
249
+ LLM_TENSOR_SSM_DT,
250
+ LLM_TENSOR_SSM_A,
251
+ LLM_TENSOR_SSM_D,
252
+ LLM_TENSOR_SSM_OUT,
253
+ LLM_TENSOR_TIME_MIX_W1,
254
+ LLM_TENSOR_TIME_MIX_W2,
255
+ LLM_TENSOR_TIME_MIX_LERP_X,
256
+ LLM_TENSOR_TIME_MIX_LERP_W,
257
+ LLM_TENSOR_TIME_MIX_LERP_K,
258
+ LLM_TENSOR_TIME_MIX_LERP_V,
259
+ LLM_TENSOR_TIME_MIX_LERP_R,
260
+ LLM_TENSOR_TIME_MIX_LERP_G,
261
+ LLM_TENSOR_TIME_MIX_LERP_FUSED,
262
+ LLM_TENSOR_TIME_MIX_FIRST,
263
+ LLM_TENSOR_TIME_MIX_DECAY,
264
+ LLM_TENSOR_TIME_MIX_DECAY_W1,
265
+ LLM_TENSOR_TIME_MIX_DECAY_W2,
266
+ LLM_TENSOR_TIME_MIX_KEY,
267
+ LLM_TENSOR_TIME_MIX_VALUE,
268
+ LLM_TENSOR_TIME_MIX_RECEPTANCE,
269
+ LLM_TENSOR_TIME_MIX_GATE,
270
+ LLM_TENSOR_TIME_MIX_LN,
271
+ LLM_TENSOR_TIME_MIX_OUTPUT,
272
+ LLM_TENSOR_CHANNEL_MIX_LERP_K,
273
+ LLM_TENSOR_CHANNEL_MIX_LERP_R,
274
+ LLM_TENSOR_CHANNEL_MIX_KEY,
275
+ LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
276
+ LLM_TENSOR_CHANNEL_MIX_VALUE,
277
+ LLM_TENSOR_ATTN_Q_A,
278
+ LLM_TENSOR_ATTN_Q_B,
279
+ LLM_TENSOR_ATTN_KV_A_MQA,
280
+ LLM_TENSOR_ATTN_KV_B,
281
+ LLM_TENSOR_ATTN_Q_A_NORM,
282
+ LLM_TENSOR_ATTN_KV_A_NORM,
283
+ LLM_TENSOR_ATTN_SUB_NORM,
284
+ LLM_TENSOR_FFN_SUB_NORM,
285
+ LLM_TENSOR_DEC_ATTN_NORM,
286
+ LLM_TENSOR_DEC_ATTN_Q,
287
+ LLM_TENSOR_DEC_ATTN_K,
288
+ LLM_TENSOR_DEC_ATTN_V,
289
+ LLM_TENSOR_DEC_ATTN_OUT,
290
+ LLM_TENSOR_DEC_ATTN_REL_B,
291
+ LLM_TENSOR_DEC_CROSS_ATTN_NORM,
292
+ LLM_TENSOR_DEC_CROSS_ATTN_Q,
293
+ LLM_TENSOR_DEC_CROSS_ATTN_K,
294
+ LLM_TENSOR_DEC_CROSS_ATTN_V,
295
+ LLM_TENSOR_DEC_CROSS_ATTN_OUT,
296
+ LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
297
+ LLM_TENSOR_DEC_FFN_NORM,
298
+ LLM_TENSOR_DEC_FFN_GATE,
299
+ LLM_TENSOR_DEC_FFN_DOWN,
300
+ LLM_TENSOR_DEC_FFN_UP,
301
+ LLM_TENSOR_DEC_OUTPUT_NORM,
302
+ LLM_TENSOR_ENC_ATTN_NORM,
303
+ LLM_TENSOR_ENC_ATTN_Q,
304
+ LLM_TENSOR_ENC_ATTN_K,
305
+ LLM_TENSOR_ENC_ATTN_V,
306
+ LLM_TENSOR_ENC_ATTN_OUT,
307
+ LLM_TENSOR_ENC_ATTN_REL_B,
308
+ LLM_TENSOR_ENC_FFN_NORM,
309
+ LLM_TENSOR_ENC_FFN_GATE,
310
+ LLM_TENSOR_ENC_FFN_DOWN,
311
+ LLM_TENSOR_ENC_FFN_UP,
312
+ LLM_TENSOR_ENC_OUTPUT_NORM,
313
+ LLM_TENSOR_CLS,
314
+ LLM_TENSOR_CLS_OUT,
315
+ LLM_TENSOR_CONV1D,
316
+ LLM_TENSOR_CONVNEXT_DW,
317
+ LLM_TENSOR_CONVNEXT_NORM,
318
+ LLM_TENSOR_CONVNEXT_PW1,
319
+ LLM_TENSOR_CONVNEXT_PW2,
320
+ LLM_TENSOR_CONVNEXT_GAMMA,
321
+ LLM_TENSOR_POS_NET_CONV1,
322
+ LLM_TENSOR_POS_NET_CONV2,
323
+ LLM_TENSOR_POS_NET_NORM,
324
+ LLM_TENSOR_POS_NET_NORM1,
325
+ LLM_TENSOR_POS_NET_NORM2,
326
+ LLM_TENSOR_POS_NET_ATTN_NORM,
327
+ LLM_TENSOR_POS_NET_ATTN_Q,
328
+ LLM_TENSOR_POS_NET_ATTN_K,
329
+ LLM_TENSOR_POS_NET_ATTN_V,
330
+ LLM_TENSOR_POS_NET_ATTN_OUT,
331
+ };
332
+
333
+ enum llm_tensor_layer {
334
+ LLM_TENSOR_LAYER_INPUT,
335
+ LLM_TENSOR_LAYER_REPEATING,
336
+ LLM_TENSOR_LAYER_OUTPUT,
337
+ };
338
+
339
+ struct LLM_KV {
340
+ LLM_KV(llm_arch arch, const char * suffix = nullptr);
341
+
342
+ llm_arch arch;
343
+ const char * suffix;
344
+
345
+ std::string operator()(llm_kv kv) const;
346
+ };
347
+
348
+ // helper to handle gguf constants
349
+ // usage:
350
+ //
351
+ // const auto tn = LLM_TN(LLM_ARCH_LLAMA);
352
+ //
353
+ // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
354
+ // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
355
+ // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
356
+ //
357
+ struct LLM_TN_IMPL {
358
+ const llm_arch arch;
359
+ const llm_tensor tensor;
360
+ const char * const suffix;
361
+ const int bid;
362
+ const int xid;
363
+
364
+ std::string str() const;
365
+
366
+ operator std::string() const {
367
+ return str();
368
+ }
369
+
370
+ friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
371
+ return str == tn.str();
372
+ }
373
+
374
+ friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
375
+ return str != tn.str();
376
+ }
377
+ };
378
+
379
+ struct LLM_TN {
380
+ LLM_TN(llm_arch arch) : arch(arch) {}
381
+
382
+ llm_arch arch;
383
+
384
+ LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
385
+ return { arch, tensor, suffix, bid, xid };
386
+ }
387
+
388
+ LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
389
+ return { arch, tensor, nullptr, bid, xid };
390
+ }
391
+ };
392
+
393
+
394
+ struct llm_tensor_info {
395
+ llm_tensor_layer layer;
396
+ lm_ggml_op op;
397
+ };
398
+
399
+ const char * llm_arch_name(llm_arch arch);
400
+
401
+ llm_arch llm_arch_from_string(const std::string & name);
402
+
403
+ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);