cui-llama.rn 1.4.4 → 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. package/android/src/main/CMakeLists.txt +2 -2
  2. package/android/src/main/jni.cpp +12 -10
  3. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  11. package/cpp/chat-template.hpp +529 -529
  12. package/cpp/chat.cpp +959 -265
  13. package/cpp/chat.h +135 -0
  14. package/cpp/common.cpp +2064 -1996
  15. package/cpp/common.h +700 -744
  16. package/cpp/ggml-alloc.c +1039 -1030
  17. package/cpp/ggml-alloc.h +1 -1
  18. package/cpp/ggml-backend-impl.h +255 -255
  19. package/cpp/ggml-backend-reg.cpp +586 -582
  20. package/cpp/ggml-backend.cpp +2004 -2002
  21. package/cpp/ggml-backend.h +354 -354
  22. package/cpp/ggml-common.h +1851 -1851
  23. package/cpp/ggml-cpp.h +39 -39
  24. package/cpp/ggml-cpu-aarch64.cpp +4248 -4247
  25. package/cpp/ggml-cpu-aarch64.h +8 -8
  26. package/cpp/ggml-cpu-impl.h +531 -380
  27. package/cpp/ggml-cpu-quants.c +12527 -11517
  28. package/cpp/ggml-cpu-traits.cpp +36 -36
  29. package/cpp/ggml-cpu-traits.h +38 -38
  30. package/cpp/ggml-cpu.c +15766 -14485
  31. package/cpp/ggml-cpu.cpp +655 -633
  32. package/cpp/ggml-cpu.h +138 -135
  33. package/cpp/ggml-impl.h +567 -567
  34. package/cpp/ggml-metal-impl.h +235 -0
  35. package/cpp/ggml-metal.h +66 -66
  36. package/cpp/ggml-metal.m +5146 -5002
  37. package/cpp/ggml-opt.cpp +854 -854
  38. package/cpp/ggml-opt.h +216 -216
  39. package/cpp/ggml-quants.c +5238 -5238
  40. package/cpp/ggml-threading.h +14 -14
  41. package/cpp/ggml.c +6529 -6524
  42. package/cpp/ggml.h +2198 -2194
  43. package/cpp/gguf.cpp +1329 -1329
  44. package/cpp/gguf.h +202 -202
  45. package/cpp/json-schema-to-grammar.cpp +1024 -1025
  46. package/cpp/json-schema-to-grammar.h +21 -22
  47. package/cpp/json.hpp +24766 -24766
  48. package/cpp/llama-adapter.cpp +347 -347
  49. package/cpp/llama-adapter.h +74 -74
  50. package/cpp/llama-arch.cpp +1513 -1492
  51. package/cpp/llama-arch.h +403 -402
  52. package/cpp/llama-batch.cpp +368 -368
  53. package/cpp/llama-batch.h +88 -88
  54. package/cpp/llama-chat.cpp +588 -587
  55. package/cpp/llama-chat.h +53 -53
  56. package/cpp/llama-context.cpp +1775 -1775
  57. package/cpp/llama-context.h +128 -128
  58. package/cpp/llama-cparams.cpp +1 -1
  59. package/cpp/llama-cparams.h +37 -37
  60. package/cpp/llama-cpp.h +30 -30
  61. package/cpp/llama-grammar.cpp +1219 -1219
  62. package/cpp/llama-grammar.h +173 -164
  63. package/cpp/llama-hparams.cpp +71 -71
  64. package/cpp/llama-hparams.h +139 -139
  65. package/cpp/llama-impl.cpp +167 -167
  66. package/cpp/llama-impl.h +61 -61
  67. package/cpp/llama-kv-cache.cpp +718 -718
  68. package/cpp/llama-kv-cache.h +219 -218
  69. package/cpp/llama-mmap.cpp +600 -590
  70. package/cpp/llama-mmap.h +68 -68
  71. package/cpp/llama-model-loader.cpp +1124 -1124
  72. package/cpp/llama-model-loader.h +167 -167
  73. package/cpp/llama-model.cpp +4087 -4023
  74. package/cpp/llama-model.h +370 -370
  75. package/cpp/llama-sampling.cpp +2558 -2525
  76. package/cpp/llama-sampling.h +32 -32
  77. package/cpp/llama-vocab.cpp +3264 -3252
  78. package/cpp/llama-vocab.h +125 -125
  79. package/cpp/llama.cpp +10284 -10137
  80. package/cpp/llama.h +1354 -1340
  81. package/cpp/log.cpp +393 -423
  82. package/cpp/log.h +132 -132
  83. package/cpp/minja/chat-template.hpp +529 -0
  84. package/cpp/minja/minja.hpp +2915 -0
  85. package/cpp/minja.hpp +2915 -2883
  86. package/cpp/rn-llama.cpp +20 -37
  87. package/cpp/rn-llama.h +12 -2
  88. package/cpp/sampling.cpp +570 -532
  89. package/cpp/sgemm.cpp +2598 -2598
  90. package/cpp/sgemm.h +14 -14
  91. package/cpp/speculative.cpp +278 -277
  92. package/cpp/speculative.h +28 -28
  93. package/package.json +1 -1
  94. package/android/src/main/build-arm64/CMakeCache.txt +0 -429
  95. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  96. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +0 -101
  97. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  98. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  99. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  100. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  101. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  102. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  103. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  104. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -431
  105. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +0 -16
  106. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +0 -165
  107. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +0 -297
  108. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +0 -1
  109. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +0 -1
  110. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +0 -1
  111. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +0 -1
  112. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +0 -1
  113. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +0 -1
  114. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +0 -1
  115. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +0 -8
  116. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +0 -1
  117. package/android/src/main/build-arm64/CMakeFiles/progress.marks +0 -1
  118. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  119. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +0 -58
  120. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  121. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +0 -756
  122. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  123. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +0 -709
  124. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  125. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +0 -714
  126. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  127. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +0 -62
  128. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  129. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +0 -708
  130. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  131. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +0 -113
  132. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  133. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +0 -713
  134. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  135. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +0 -763
  136. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  137. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +0 -61
  138. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  139. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +0 -707
  140. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  141. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +0 -104
  142. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  143. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +0 -714
  144. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  145. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +0 -723
  146. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +0 -62
  147. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +0 -722
  148. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +0 -89
  149. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +0 -2
  150. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +0 -2
  151. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +0 -2
  152. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +0 -17
  153. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +0 -41
  154. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +0 -62
  155. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +0 -722
  156. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +0 -89
  157. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +0 -2
  158. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +0 -2
  159. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +0 -2
  160. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +0 -17
  161. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +0 -41
  162. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +0 -62
  163. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +0 -722
  164. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +0 -89
  165. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +0 -2
  166. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +0 -2
  167. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +0 -2
  168. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +0 -17
  169. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +0 -41
  170. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +0 -62
  171. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +0 -722
  172. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +0 -89
  173. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +0 -2
  174. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +0 -2
  175. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +0 -2
  176. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +0 -17
  177. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +0 -41
  178. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +0 -62
  179. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +0 -722
  180. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +0 -89
  181. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +0 -2
  182. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +0 -2
  183. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +0 -2
  184. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +0 -17
  185. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +0 -41
  186. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +0 -62
  187. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +0 -722
  188. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +0 -89
  189. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +0 -2
  190. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +0 -2
  191. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +0 -2
  192. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +0 -17
  193. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +0 -41
  194. package/android/src/main/build-arm64/Makefile +0 -1862
  195. package/android/src/main/build-arm64/cmake_install.cmake +0 -66
  196. package/cpp/chat.hpp +0 -55
  197. package/cpp/rn-llama.hpp +0 -913
package/cpp/llama-arch.h CHANGED
@@ -1,402 +1,403 @@
1
- #pragma once
2
-
3
- #include "ggml.h" // lm_ggml_op
4
-
5
- #include <string>
6
-
7
- //
8
- // gguf constants (sync with gguf.py)
9
- //
10
-
11
- enum llm_arch {
12
- LLM_ARCH_LLAMA,
13
- LLM_ARCH_DECI,
14
- LLM_ARCH_FALCON,
15
- LLM_ARCH_BAICHUAN,
16
- LLM_ARCH_GROK,
17
- LLM_ARCH_GPT2,
18
- LLM_ARCH_GPTJ,
19
- LLM_ARCH_GPTNEOX,
20
- LLM_ARCH_MPT,
21
- LLM_ARCH_STARCODER,
22
- LLM_ARCH_REFACT,
23
- LLM_ARCH_BERT,
24
- LLM_ARCH_NOMIC_BERT,
25
- LLM_ARCH_JINA_BERT_V2,
26
- LLM_ARCH_BLOOM,
27
- LLM_ARCH_STABLELM,
28
- LLM_ARCH_QWEN,
29
- LLM_ARCH_QWEN2,
30
- LLM_ARCH_QWEN2MOE,
31
- LLM_ARCH_QWEN2VL,
32
- LLM_ARCH_PHI2,
33
- LLM_ARCH_PHI3,
34
- LLM_ARCH_PHIMOE,
35
- LLM_ARCH_PLAMO,
36
- LLM_ARCH_CODESHELL,
37
- LLM_ARCH_ORION,
38
- LLM_ARCH_INTERNLM2,
39
- LLM_ARCH_MINICPM,
40
- LLM_ARCH_MINICPM3,
41
- LLM_ARCH_GEMMA,
42
- LLM_ARCH_GEMMA2,
43
- LLM_ARCH_STARCODER2,
44
- LLM_ARCH_MAMBA,
45
- LLM_ARCH_XVERSE,
46
- LLM_ARCH_COMMAND_R,
47
- LLM_ARCH_COHERE2,
48
- LLM_ARCH_DBRX,
49
- LLM_ARCH_OLMO,
50
- LLM_ARCH_OLMO2,
51
- LLM_ARCH_OLMOE,
52
- LLM_ARCH_OPENELM,
53
- LLM_ARCH_ARCTIC,
54
- LLM_ARCH_DEEPSEEK,
55
- LLM_ARCH_DEEPSEEK2,
56
- LLM_ARCH_CHATGLM,
57
- LLM_ARCH_BITNET,
58
- LLM_ARCH_T5,
59
- LLM_ARCH_T5ENCODER,
60
- LLM_ARCH_JAIS,
61
- LLM_ARCH_NEMOTRON,
62
- LLM_ARCH_EXAONE,
63
- LLM_ARCH_RWKV6,
64
- LLM_ARCH_RWKV6QWEN2,
65
- LLM_ARCH_GRANITE,
66
- LLM_ARCH_GRANITE_MOE,
67
- LLM_ARCH_CHAMELEON,
68
- LLM_ARCH_WAVTOKENIZER_DEC,
69
- LLM_ARCH_UNKNOWN,
70
- };
71
-
72
- enum llm_kv {
73
- LLM_KV_GENERAL_TYPE,
74
- LLM_KV_GENERAL_ARCHITECTURE,
75
- LLM_KV_GENERAL_QUANTIZATION_VERSION,
76
- LLM_KV_GENERAL_ALIGNMENT,
77
- LLM_KV_GENERAL_NAME,
78
- LLM_KV_GENERAL_AUTHOR,
79
- LLM_KV_GENERAL_VERSION,
80
- LLM_KV_GENERAL_URL,
81
- LLM_KV_GENERAL_DESCRIPTION,
82
- LLM_KV_GENERAL_LICENSE,
83
- LLM_KV_GENERAL_SOURCE_URL,
84
- LLM_KV_GENERAL_SOURCE_HF_REPO,
85
-
86
- LLM_KV_VOCAB_SIZE,
87
- LLM_KV_CONTEXT_LENGTH,
88
- LLM_KV_EMBEDDING_LENGTH,
89
- LLM_KV_FEATURES_LENGTH,
90
- LLM_KV_BLOCK_COUNT,
91
- LLM_KV_LEADING_DENSE_BLOCK_COUNT,
92
- LLM_KV_FEED_FORWARD_LENGTH,
93
- LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
94
- LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
95
- LLM_KV_USE_PARALLEL_RESIDUAL,
96
- LLM_KV_TENSOR_DATA_LAYOUT,
97
- LLM_KV_EXPERT_COUNT,
98
- LLM_KV_EXPERT_USED_COUNT,
99
- LLM_KV_EXPERT_SHARED_COUNT,
100
- LLM_KV_EXPERT_WEIGHTS_SCALE,
101
- LLM_KV_EXPERT_WEIGHTS_NORM,
102
- LLM_KV_EXPERT_GATING_FUNC,
103
- LLM_KV_POOLING_TYPE,
104
- LLM_KV_LOGIT_SCALE,
105
- LLM_KV_DECODER_START_TOKEN_ID,
106
- LLM_KV_ATTN_LOGIT_SOFTCAPPING,
107
- LLM_KV_FINAL_LOGIT_SOFTCAPPING,
108
- LLM_KV_SWIN_NORM,
109
- LLM_KV_RESCALE_EVERY_N_LAYERS,
110
- LLM_KV_TIME_MIX_EXTRA_DIM,
111
- LLM_KV_TIME_DECAY_EXTRA_DIM,
112
- LLM_KV_RESIDUAL_SCALE,
113
- LLM_KV_EMBEDDING_SCALE,
114
- LLM_KV_TOKEN_SHIFT_COUNT,
115
-
116
- LLM_KV_ATTENTION_HEAD_COUNT,
117
- LLM_KV_ATTENTION_HEAD_COUNT_KV,
118
- LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
119
- LLM_KV_ATTENTION_CLAMP_KQV,
120
- LLM_KV_ATTENTION_KEY_LENGTH,
121
- LLM_KV_ATTENTION_VALUE_LENGTH,
122
- LLM_KV_ATTENTION_LAYERNORM_EPS,
123
- LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
124
- LLM_KV_ATTENTION_GROUPNORM_EPS,
125
- LLM_KV_ATTENTION_GROUPNORM_GROUPS,
126
- LLM_KV_ATTENTION_CAUSAL,
127
- LLM_KV_ATTENTION_Q_LORA_RANK,
128
- LLM_KV_ATTENTION_KV_LORA_RANK,
129
- LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
130
- LLM_KV_ATTENTION_SLIDING_WINDOW,
131
- LLM_KV_ATTENTION_SCALE,
132
-
133
- LLM_KV_ROPE_DIMENSION_COUNT,
134
- LLM_KV_ROPE_DIMENSION_SECTIONS,
135
- LLM_KV_ROPE_FREQ_BASE,
136
- LLM_KV_ROPE_SCALE_LINEAR,
137
- LLM_KV_ROPE_SCALING_TYPE,
138
- LLM_KV_ROPE_SCALING_FACTOR,
139
- LLM_KV_ROPE_SCALING_ATTN_FACTOR,
140
- LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
141
- LLM_KV_ROPE_SCALING_FINETUNED,
142
- LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
143
-
144
- LLM_KV_SPLIT_NO,
145
- LLM_KV_SPLIT_COUNT,
146
- LLM_KV_SPLIT_TENSORS_COUNT,
147
-
148
- LLM_KV_SSM_INNER_SIZE,
149
- LLM_KV_SSM_CONV_KERNEL,
150
- LLM_KV_SSM_STATE_SIZE,
151
- LLM_KV_SSM_TIME_STEP_RANK,
152
- LLM_KV_SSM_DT_B_C_RMS,
153
-
154
- LLM_KV_WKV_HEAD_SIZE,
155
-
156
- LLM_KV_TOKENIZER_MODEL,
157
- LLM_KV_TOKENIZER_PRE,
158
- LLM_KV_TOKENIZER_LIST,
159
- LLM_KV_TOKENIZER_TOKEN_TYPE,
160
- LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
161
- LLM_KV_TOKENIZER_SCORES,
162
- LLM_KV_TOKENIZER_MERGES,
163
- LLM_KV_TOKENIZER_BOS_ID,
164
- LLM_KV_TOKENIZER_EOS_ID,
165
- LLM_KV_TOKENIZER_EOT_ID,
166
- LLM_KV_TOKENIZER_EOM_ID,
167
- LLM_KV_TOKENIZER_UNK_ID,
168
- LLM_KV_TOKENIZER_SEP_ID,
169
- LLM_KV_TOKENIZER_PAD_ID,
170
- LLM_KV_TOKENIZER_CLS_ID,
171
- LLM_KV_TOKENIZER_MASK_ID,
172
- LLM_KV_TOKENIZER_ADD_BOS,
173
- LLM_KV_TOKENIZER_ADD_EOS,
174
- LLM_KV_TOKENIZER_ADD_PREFIX,
175
- LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
176
- LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
177
- LLM_KV_TOKENIZER_HF_JSON,
178
- LLM_KV_TOKENIZER_RWKV,
179
- LLM_KV_TOKENIZER_CHAT_TEMPLATE,
180
- LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
181
- LLM_KV_TOKENIZER_FIM_PRE_ID,
182
- LLM_KV_TOKENIZER_FIM_SUF_ID,
183
- LLM_KV_TOKENIZER_FIM_MID_ID,
184
- LLM_KV_TOKENIZER_FIM_PAD_ID,
185
- LLM_KV_TOKENIZER_FIM_REP_ID,
186
- LLM_KV_TOKENIZER_FIM_SEP_ID,
187
-
188
- LLM_KV_ADAPTER_TYPE,
189
- LLM_KV_ADAPTER_LORA_ALPHA,
190
-
191
- LLM_KV_POSNET_EMBEDDING_LENGTH,
192
- LLM_KV_POSNET_BLOCK_COUNT,
193
-
194
- LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
195
- LLM_KV_CONVNEXT_BLOCK_COUNT,
196
-
197
- // deprecated:
198
- LLM_KV_TOKENIZER_PREFIX_ID,
199
- LLM_KV_TOKENIZER_SUFFIX_ID,
200
- LLM_KV_TOKENIZER_MIDDLE_ID,
201
- };
202
-
203
- enum llm_tensor {
204
- LLM_TENSOR_TOKEN_EMBD,
205
- LLM_TENSOR_TOKEN_EMBD_NORM,
206
- LLM_TENSOR_TOKEN_TYPES,
207
- LLM_TENSOR_POS_EMBD,
208
- LLM_TENSOR_OUTPUT,
209
- LLM_TENSOR_OUTPUT_NORM,
210
- LLM_TENSOR_ROPE_FREQS,
211
- LLM_TENSOR_ROPE_FACTORS_LONG,
212
- LLM_TENSOR_ROPE_FACTORS_SHORT,
213
- LLM_TENSOR_ATTN_Q,
214
- LLM_TENSOR_ATTN_K,
215
- LLM_TENSOR_ATTN_V,
216
- LLM_TENSOR_ATTN_QKV,
217
- LLM_TENSOR_ATTN_OUT,
218
- LLM_TENSOR_ATTN_NORM,
219
- LLM_TENSOR_ATTN_NORM_2,
220
- LLM_TENSOR_ATTN_OUT_NORM,
221
- LLM_TENSOR_ATTN_POST_NORM,
222
- LLM_TENSOR_ATTN_ROT_EMBD,
223
- LLM_TENSOR_FFN_GATE_INP,
224
- LLM_TENSOR_FFN_GATE_INP_SHEXP,
225
- LLM_TENSOR_FFN_NORM,
226
- LLM_TENSOR_FFN_POST_NORM,
227
- LLM_TENSOR_FFN_GATE,
228
- LLM_TENSOR_FFN_DOWN,
229
- LLM_TENSOR_FFN_UP,
230
- LLM_TENSOR_FFN_ACT,
231
- LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
232
- LLM_TENSOR_FFN_GATE_EXP,
233
- LLM_TENSOR_FFN_UP_EXP,
234
- LLM_TENSOR_FFN_NORM_EXPS,
235
- LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
236
- LLM_TENSOR_FFN_GATE_EXPS,
237
- LLM_TENSOR_FFN_UP_EXPS,
238
- LLM_TENSOR_FFN_DOWN_SHEXP,
239
- LLM_TENSOR_FFN_GATE_SHEXP,
240
- LLM_TENSOR_FFN_UP_SHEXP,
241
- LLM_TENSOR_FFN_EXP_PROBS_B,
242
- LLM_TENSOR_ATTN_Q_NORM,
243
- LLM_TENSOR_ATTN_K_NORM,
244
- LLM_TENSOR_LAYER_OUT_NORM,
245
- LLM_TENSOR_SSM_IN,
246
- LLM_TENSOR_SSM_CONV1D,
247
- LLM_TENSOR_SSM_X,
248
- LLM_TENSOR_SSM_DT,
249
- LLM_TENSOR_SSM_A,
250
- LLM_TENSOR_SSM_D,
251
- LLM_TENSOR_SSM_OUT,
252
- LLM_TENSOR_TIME_MIX_W1,
253
- LLM_TENSOR_TIME_MIX_W2,
254
- LLM_TENSOR_TIME_MIX_LERP_X,
255
- LLM_TENSOR_TIME_MIX_LERP_W,
256
- LLM_TENSOR_TIME_MIX_LERP_K,
257
- LLM_TENSOR_TIME_MIX_LERP_V,
258
- LLM_TENSOR_TIME_MIX_LERP_R,
259
- LLM_TENSOR_TIME_MIX_LERP_G,
260
- LLM_TENSOR_TIME_MIX_LERP_FUSED,
261
- LLM_TENSOR_TIME_MIX_FIRST,
262
- LLM_TENSOR_TIME_MIX_DECAY,
263
- LLM_TENSOR_TIME_MIX_DECAY_W1,
264
- LLM_TENSOR_TIME_MIX_DECAY_W2,
265
- LLM_TENSOR_TIME_MIX_KEY,
266
- LLM_TENSOR_TIME_MIX_VALUE,
267
- LLM_TENSOR_TIME_MIX_RECEPTANCE,
268
- LLM_TENSOR_TIME_MIX_GATE,
269
- LLM_TENSOR_TIME_MIX_LN,
270
- LLM_TENSOR_TIME_MIX_OUTPUT,
271
- LLM_TENSOR_CHANNEL_MIX_LERP_K,
272
- LLM_TENSOR_CHANNEL_MIX_LERP_R,
273
- LLM_TENSOR_CHANNEL_MIX_KEY,
274
- LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
275
- LLM_TENSOR_CHANNEL_MIX_VALUE,
276
- LLM_TENSOR_ATTN_Q_A,
277
- LLM_TENSOR_ATTN_Q_B,
278
- LLM_TENSOR_ATTN_KV_A_MQA,
279
- LLM_TENSOR_ATTN_KV_B,
280
- LLM_TENSOR_ATTN_Q_A_NORM,
281
- LLM_TENSOR_ATTN_KV_A_NORM,
282
- LLM_TENSOR_ATTN_SUB_NORM,
283
- LLM_TENSOR_FFN_SUB_NORM,
284
- LLM_TENSOR_DEC_ATTN_NORM,
285
- LLM_TENSOR_DEC_ATTN_Q,
286
- LLM_TENSOR_DEC_ATTN_K,
287
- LLM_TENSOR_DEC_ATTN_V,
288
- LLM_TENSOR_DEC_ATTN_OUT,
289
- LLM_TENSOR_DEC_ATTN_REL_B,
290
- LLM_TENSOR_DEC_CROSS_ATTN_NORM,
291
- LLM_TENSOR_DEC_CROSS_ATTN_Q,
292
- LLM_TENSOR_DEC_CROSS_ATTN_K,
293
- LLM_TENSOR_DEC_CROSS_ATTN_V,
294
- LLM_TENSOR_DEC_CROSS_ATTN_OUT,
295
- LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
296
- LLM_TENSOR_DEC_FFN_NORM,
297
- LLM_TENSOR_DEC_FFN_GATE,
298
- LLM_TENSOR_DEC_FFN_DOWN,
299
- LLM_TENSOR_DEC_FFN_UP,
300
- LLM_TENSOR_DEC_OUTPUT_NORM,
301
- LLM_TENSOR_ENC_ATTN_NORM,
302
- LLM_TENSOR_ENC_ATTN_Q,
303
- LLM_TENSOR_ENC_ATTN_K,
304
- LLM_TENSOR_ENC_ATTN_V,
305
- LLM_TENSOR_ENC_ATTN_OUT,
306
- LLM_TENSOR_ENC_ATTN_REL_B,
307
- LLM_TENSOR_ENC_FFN_NORM,
308
- LLM_TENSOR_ENC_FFN_GATE,
309
- LLM_TENSOR_ENC_FFN_DOWN,
310
- LLM_TENSOR_ENC_FFN_UP,
311
- LLM_TENSOR_ENC_OUTPUT_NORM,
312
- LLM_TENSOR_CLS,
313
- LLM_TENSOR_CLS_OUT,
314
- LLM_TENSOR_CONV1D,
315
- LLM_TENSOR_CONVNEXT_DW,
316
- LLM_TENSOR_CONVNEXT_NORM,
317
- LLM_TENSOR_CONVNEXT_PW1,
318
- LLM_TENSOR_CONVNEXT_PW2,
319
- LLM_TENSOR_CONVNEXT_GAMMA,
320
- LLM_TENSOR_POS_NET_CONV1,
321
- LLM_TENSOR_POS_NET_CONV2,
322
- LLM_TENSOR_POS_NET_NORM,
323
- LLM_TENSOR_POS_NET_NORM1,
324
- LLM_TENSOR_POS_NET_NORM2,
325
- LLM_TENSOR_POS_NET_ATTN_NORM,
326
- LLM_TENSOR_POS_NET_ATTN_Q,
327
- LLM_TENSOR_POS_NET_ATTN_K,
328
- LLM_TENSOR_POS_NET_ATTN_V,
329
- LLM_TENSOR_POS_NET_ATTN_OUT,
330
- };
331
-
332
- enum llm_tensor_layer {
333
- LLM_TENSOR_LAYER_INPUT,
334
- LLM_TENSOR_LAYER_REPEATING,
335
- LLM_TENSOR_LAYER_OUTPUT,
336
- };
337
-
338
- struct LLM_KV {
339
- LLM_KV(llm_arch arch, const char * suffix = nullptr);
340
-
341
- llm_arch arch;
342
- const char * suffix;
343
-
344
- std::string operator()(llm_kv kv) const;
345
- };
346
-
347
- // helper to handle gguf constants
348
- // usage:
349
- //
350
- // const auto tn = LLM_TN(LLM_ARCH_LLAMA);
351
- //
352
- // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
353
- // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
354
- // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
355
- //
356
- struct LLM_TN_IMPL {
357
- const llm_arch arch;
358
- const llm_tensor tensor;
359
- const char * const suffix;
360
- const int bid;
361
- const int xid;
362
-
363
- std::string str() const;
364
-
365
- operator std::string() const {
366
- return str();
367
- }
368
-
369
- friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
370
- return str == tn.str();
371
- }
372
-
373
- friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
374
- return str != tn.str();
375
- }
376
- };
377
-
378
- struct LLM_TN {
379
- LLM_TN(llm_arch arch) : arch(arch) {}
380
-
381
- llm_arch arch;
382
-
383
- LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
384
- return { arch, tensor, suffix, bid, xid };
385
- }
386
-
387
- LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
388
- return { arch, tensor, nullptr, bid, xid };
389
- }
390
- };
391
-
392
-
393
- struct llm_tensor_info {
394
- llm_tensor_layer layer;
395
- lm_ggml_op op;
396
- };
397
-
398
- const char * llm_arch_name(llm_arch arch);
399
-
400
- llm_arch llm_arch_from_string(const std::string & name);
401
-
402
- const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
1
+ #pragma once
2
+
3
+ #include "ggml.h" // lm_ggml_op
4
+
5
+ #include <string>
6
+
7
+ //
8
+ // gguf constants (sync with gguf.py)
9
+ //
10
+
11
+ enum llm_arch {
12
+ LLM_ARCH_LLAMA,
13
+ LLM_ARCH_DECI,
14
+ LLM_ARCH_FALCON,
15
+ LLM_ARCH_BAICHUAN,
16
+ LLM_ARCH_GROK,
17
+ LLM_ARCH_GPT2,
18
+ LLM_ARCH_GPTJ,
19
+ LLM_ARCH_GPTNEOX,
20
+ LLM_ARCH_MPT,
21
+ LLM_ARCH_STARCODER,
22
+ LLM_ARCH_REFACT,
23
+ LLM_ARCH_BERT,
24
+ LLM_ARCH_NOMIC_BERT,
25
+ LLM_ARCH_JINA_BERT_V2,
26
+ LLM_ARCH_BLOOM,
27
+ LLM_ARCH_STABLELM,
28
+ LLM_ARCH_QWEN,
29
+ LLM_ARCH_QWEN2,
30
+ LLM_ARCH_QWEN2MOE,
31
+ LLM_ARCH_QWEN2VL,
32
+ LLM_ARCH_PHI2,
33
+ LLM_ARCH_PHI3,
34
+ LLM_ARCH_PHIMOE,
35
+ LLM_ARCH_PLAMO,
36
+ LLM_ARCH_CODESHELL,
37
+ LLM_ARCH_ORION,
38
+ LLM_ARCH_INTERNLM2,
39
+ LLM_ARCH_MINICPM,
40
+ LLM_ARCH_MINICPM3,
41
+ LLM_ARCH_GEMMA,
42
+ LLM_ARCH_GEMMA2,
43
+ LLM_ARCH_GEMMA3,
44
+ LLM_ARCH_STARCODER2,
45
+ LLM_ARCH_MAMBA,
46
+ LLM_ARCH_XVERSE,
47
+ LLM_ARCH_COMMAND_R,
48
+ LLM_ARCH_COHERE2,
49
+ LLM_ARCH_DBRX,
50
+ LLM_ARCH_OLMO,
51
+ LLM_ARCH_OLMO2,
52
+ LLM_ARCH_OLMOE,
53
+ LLM_ARCH_OPENELM,
54
+ LLM_ARCH_ARCTIC,
55
+ LLM_ARCH_DEEPSEEK,
56
+ LLM_ARCH_DEEPSEEK2,
57
+ LLM_ARCH_CHATGLM,
58
+ LLM_ARCH_BITNET,
59
+ LLM_ARCH_T5,
60
+ LLM_ARCH_T5ENCODER,
61
+ LLM_ARCH_JAIS,
62
+ LLM_ARCH_NEMOTRON,
63
+ LLM_ARCH_EXAONE,
64
+ LLM_ARCH_RWKV6,
65
+ LLM_ARCH_RWKV6QWEN2,
66
+ LLM_ARCH_GRANITE,
67
+ LLM_ARCH_GRANITE_MOE,
68
+ LLM_ARCH_CHAMELEON,
69
+ LLM_ARCH_WAVTOKENIZER_DEC,
70
+ LLM_ARCH_UNKNOWN,
71
+ };
72
+
73
+ enum llm_kv {
74
+ LLM_KV_GENERAL_TYPE,
75
+ LLM_KV_GENERAL_ARCHITECTURE,
76
+ LLM_KV_GENERAL_QUANTIZATION_VERSION,
77
+ LLM_KV_GENERAL_ALIGNMENT,
78
+ LLM_KV_GENERAL_NAME,
79
+ LLM_KV_GENERAL_AUTHOR,
80
+ LLM_KV_GENERAL_VERSION,
81
+ LLM_KV_GENERAL_URL,
82
+ LLM_KV_GENERAL_DESCRIPTION,
83
+ LLM_KV_GENERAL_LICENSE,
84
+ LLM_KV_GENERAL_SOURCE_URL,
85
+ LLM_KV_GENERAL_SOURCE_HF_REPO,
86
+
87
+ LLM_KV_VOCAB_SIZE,
88
+ LLM_KV_CONTEXT_LENGTH,
89
+ LLM_KV_EMBEDDING_LENGTH,
90
+ LLM_KV_FEATURES_LENGTH,
91
+ LLM_KV_BLOCK_COUNT,
92
+ LLM_KV_LEADING_DENSE_BLOCK_COUNT,
93
+ LLM_KV_FEED_FORWARD_LENGTH,
94
+ LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
95
+ LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
96
+ LLM_KV_USE_PARALLEL_RESIDUAL,
97
+ LLM_KV_TENSOR_DATA_LAYOUT,
98
+ LLM_KV_EXPERT_COUNT,
99
+ LLM_KV_EXPERT_USED_COUNT,
100
+ LLM_KV_EXPERT_SHARED_COUNT,
101
+ LLM_KV_EXPERT_WEIGHTS_SCALE,
102
+ LLM_KV_EXPERT_WEIGHTS_NORM,
103
+ LLM_KV_EXPERT_GATING_FUNC,
104
+ LLM_KV_POOLING_TYPE,
105
+ LLM_KV_LOGIT_SCALE,
106
+ LLM_KV_DECODER_START_TOKEN_ID,
107
+ LLM_KV_ATTN_LOGIT_SOFTCAPPING,
108
+ LLM_KV_FINAL_LOGIT_SOFTCAPPING,
109
+ LLM_KV_SWIN_NORM,
110
+ LLM_KV_RESCALE_EVERY_N_LAYERS,
111
+ LLM_KV_TIME_MIX_EXTRA_DIM,
112
+ LLM_KV_TIME_DECAY_EXTRA_DIM,
113
+ LLM_KV_RESIDUAL_SCALE,
114
+ LLM_KV_EMBEDDING_SCALE,
115
+ LLM_KV_TOKEN_SHIFT_COUNT,
116
+
117
+ LLM_KV_ATTENTION_HEAD_COUNT,
118
+ LLM_KV_ATTENTION_HEAD_COUNT_KV,
119
+ LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
120
+ LLM_KV_ATTENTION_CLAMP_KQV,
121
+ LLM_KV_ATTENTION_KEY_LENGTH,
122
+ LLM_KV_ATTENTION_VALUE_LENGTH,
123
+ LLM_KV_ATTENTION_LAYERNORM_EPS,
124
+ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
125
+ LLM_KV_ATTENTION_GROUPNORM_EPS,
126
+ LLM_KV_ATTENTION_GROUPNORM_GROUPS,
127
+ LLM_KV_ATTENTION_CAUSAL,
128
+ LLM_KV_ATTENTION_Q_LORA_RANK,
129
+ LLM_KV_ATTENTION_KV_LORA_RANK,
130
+ LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
131
+ LLM_KV_ATTENTION_SLIDING_WINDOW,
132
+ LLM_KV_ATTENTION_SCALE,
133
+
134
+ LLM_KV_ROPE_DIMENSION_COUNT,
135
+ LLM_KV_ROPE_DIMENSION_SECTIONS,
136
+ LLM_KV_ROPE_FREQ_BASE,
137
+ LLM_KV_ROPE_SCALE_LINEAR,
138
+ LLM_KV_ROPE_SCALING_TYPE,
139
+ LLM_KV_ROPE_SCALING_FACTOR,
140
+ LLM_KV_ROPE_SCALING_ATTN_FACTOR,
141
+ LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
142
+ LLM_KV_ROPE_SCALING_FINETUNED,
143
+ LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
144
+
145
+ LLM_KV_SPLIT_NO,
146
+ LLM_KV_SPLIT_COUNT,
147
+ LLM_KV_SPLIT_TENSORS_COUNT,
148
+
149
+ LLM_KV_SSM_INNER_SIZE,
150
+ LLM_KV_SSM_CONV_KERNEL,
151
+ LLM_KV_SSM_STATE_SIZE,
152
+ LLM_KV_SSM_TIME_STEP_RANK,
153
+ LLM_KV_SSM_DT_B_C_RMS,
154
+
155
+ LLM_KV_WKV_HEAD_SIZE,
156
+
157
+ LLM_KV_TOKENIZER_MODEL,
158
+ LLM_KV_TOKENIZER_PRE,
159
+ LLM_KV_TOKENIZER_LIST,
160
+ LLM_KV_TOKENIZER_TOKEN_TYPE,
161
+ LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
162
+ LLM_KV_TOKENIZER_SCORES,
163
+ LLM_KV_TOKENIZER_MERGES,
164
+ LLM_KV_TOKENIZER_BOS_ID,
165
+ LLM_KV_TOKENIZER_EOS_ID,
166
+ LLM_KV_TOKENIZER_EOT_ID,
167
+ LLM_KV_TOKENIZER_EOM_ID,
168
+ LLM_KV_TOKENIZER_UNK_ID,
169
+ LLM_KV_TOKENIZER_SEP_ID,
170
+ LLM_KV_TOKENIZER_PAD_ID,
171
+ LLM_KV_TOKENIZER_CLS_ID,
172
+ LLM_KV_TOKENIZER_MASK_ID,
173
+ LLM_KV_TOKENIZER_ADD_BOS,
174
+ LLM_KV_TOKENIZER_ADD_EOS,
175
+ LLM_KV_TOKENIZER_ADD_PREFIX,
176
+ LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
177
+ LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
178
+ LLM_KV_TOKENIZER_HF_JSON,
179
+ LLM_KV_TOKENIZER_RWKV,
180
+ LLM_KV_TOKENIZER_CHAT_TEMPLATE,
181
+ LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
182
+ LLM_KV_TOKENIZER_FIM_PRE_ID,
183
+ LLM_KV_TOKENIZER_FIM_SUF_ID,
184
+ LLM_KV_TOKENIZER_FIM_MID_ID,
185
+ LLM_KV_TOKENIZER_FIM_PAD_ID,
186
+ LLM_KV_TOKENIZER_FIM_REP_ID,
187
+ LLM_KV_TOKENIZER_FIM_SEP_ID,
188
+
189
+ LLM_KV_ADAPTER_TYPE,
190
+ LLM_KV_ADAPTER_LORA_ALPHA,
191
+
192
+ LLM_KV_POSNET_EMBEDDING_LENGTH,
193
+ LLM_KV_POSNET_BLOCK_COUNT,
194
+
195
+ LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
196
+ LLM_KV_CONVNEXT_BLOCK_COUNT,
197
+
198
+ // deprecated:
199
+ LLM_KV_TOKENIZER_PREFIX_ID,
200
+ LLM_KV_TOKENIZER_SUFFIX_ID,
201
+ LLM_KV_TOKENIZER_MIDDLE_ID,
202
+ };
203
+
204
+ enum llm_tensor {
205
+ LLM_TENSOR_TOKEN_EMBD,
206
+ LLM_TENSOR_TOKEN_EMBD_NORM,
207
+ LLM_TENSOR_TOKEN_TYPES,
208
+ LLM_TENSOR_POS_EMBD,
209
+ LLM_TENSOR_OUTPUT,
210
+ LLM_TENSOR_OUTPUT_NORM,
211
+ LLM_TENSOR_ROPE_FREQS,
212
+ LLM_TENSOR_ROPE_FACTORS_LONG,
213
+ LLM_TENSOR_ROPE_FACTORS_SHORT,
214
+ LLM_TENSOR_ATTN_Q,
215
+ LLM_TENSOR_ATTN_K,
216
+ LLM_TENSOR_ATTN_V,
217
+ LLM_TENSOR_ATTN_QKV,
218
+ LLM_TENSOR_ATTN_OUT,
219
+ LLM_TENSOR_ATTN_NORM,
220
+ LLM_TENSOR_ATTN_NORM_2,
221
+ LLM_TENSOR_ATTN_OUT_NORM,
222
+ LLM_TENSOR_ATTN_POST_NORM,
223
+ LLM_TENSOR_ATTN_ROT_EMBD,
224
+ LLM_TENSOR_FFN_GATE_INP,
225
+ LLM_TENSOR_FFN_GATE_INP_SHEXP,
226
+ LLM_TENSOR_FFN_NORM,
227
+ LLM_TENSOR_FFN_POST_NORM,
228
+ LLM_TENSOR_FFN_GATE,
229
+ LLM_TENSOR_FFN_DOWN,
230
+ LLM_TENSOR_FFN_UP,
231
+ LLM_TENSOR_FFN_ACT,
232
+ LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
233
+ LLM_TENSOR_FFN_GATE_EXP,
234
+ LLM_TENSOR_FFN_UP_EXP,
235
+ LLM_TENSOR_FFN_NORM_EXPS,
236
+ LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
237
+ LLM_TENSOR_FFN_GATE_EXPS,
238
+ LLM_TENSOR_FFN_UP_EXPS,
239
+ LLM_TENSOR_FFN_DOWN_SHEXP,
240
+ LLM_TENSOR_FFN_GATE_SHEXP,
241
+ LLM_TENSOR_FFN_UP_SHEXP,
242
+ LLM_TENSOR_FFN_EXP_PROBS_B,
243
+ LLM_TENSOR_ATTN_Q_NORM,
244
+ LLM_TENSOR_ATTN_K_NORM,
245
+ LLM_TENSOR_LAYER_OUT_NORM,
246
+ LLM_TENSOR_SSM_IN,
247
+ LLM_TENSOR_SSM_CONV1D,
248
+ LLM_TENSOR_SSM_X,
249
+ LLM_TENSOR_SSM_DT,
250
+ LLM_TENSOR_SSM_A,
251
+ LLM_TENSOR_SSM_D,
252
+ LLM_TENSOR_SSM_OUT,
253
+ LLM_TENSOR_TIME_MIX_W1,
254
+ LLM_TENSOR_TIME_MIX_W2,
255
+ LLM_TENSOR_TIME_MIX_LERP_X,
256
+ LLM_TENSOR_TIME_MIX_LERP_W,
257
+ LLM_TENSOR_TIME_MIX_LERP_K,
258
+ LLM_TENSOR_TIME_MIX_LERP_V,
259
+ LLM_TENSOR_TIME_MIX_LERP_R,
260
+ LLM_TENSOR_TIME_MIX_LERP_G,
261
+ LLM_TENSOR_TIME_MIX_LERP_FUSED,
262
+ LLM_TENSOR_TIME_MIX_FIRST,
263
+ LLM_TENSOR_TIME_MIX_DECAY,
264
+ LLM_TENSOR_TIME_MIX_DECAY_W1,
265
+ LLM_TENSOR_TIME_MIX_DECAY_W2,
266
+ LLM_TENSOR_TIME_MIX_KEY,
267
+ LLM_TENSOR_TIME_MIX_VALUE,
268
+ LLM_TENSOR_TIME_MIX_RECEPTANCE,
269
+ LLM_TENSOR_TIME_MIX_GATE,
270
+ LLM_TENSOR_TIME_MIX_LN,
271
+ LLM_TENSOR_TIME_MIX_OUTPUT,
272
+ LLM_TENSOR_CHANNEL_MIX_LERP_K,
273
+ LLM_TENSOR_CHANNEL_MIX_LERP_R,
274
+ LLM_TENSOR_CHANNEL_MIX_KEY,
275
+ LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
276
+ LLM_TENSOR_CHANNEL_MIX_VALUE,
277
+ LLM_TENSOR_ATTN_Q_A,
278
+ LLM_TENSOR_ATTN_Q_B,
279
+ LLM_TENSOR_ATTN_KV_A_MQA,
280
+ LLM_TENSOR_ATTN_KV_B,
281
+ LLM_TENSOR_ATTN_Q_A_NORM,
282
+ LLM_TENSOR_ATTN_KV_A_NORM,
283
+ LLM_TENSOR_ATTN_SUB_NORM,
284
+ LLM_TENSOR_FFN_SUB_NORM,
285
+ LLM_TENSOR_DEC_ATTN_NORM,
286
+ LLM_TENSOR_DEC_ATTN_Q,
287
+ LLM_TENSOR_DEC_ATTN_K,
288
+ LLM_TENSOR_DEC_ATTN_V,
289
+ LLM_TENSOR_DEC_ATTN_OUT,
290
+ LLM_TENSOR_DEC_ATTN_REL_B,
291
+ LLM_TENSOR_DEC_CROSS_ATTN_NORM,
292
+ LLM_TENSOR_DEC_CROSS_ATTN_Q,
293
+ LLM_TENSOR_DEC_CROSS_ATTN_K,
294
+ LLM_TENSOR_DEC_CROSS_ATTN_V,
295
+ LLM_TENSOR_DEC_CROSS_ATTN_OUT,
296
+ LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
297
+ LLM_TENSOR_DEC_FFN_NORM,
298
+ LLM_TENSOR_DEC_FFN_GATE,
299
+ LLM_TENSOR_DEC_FFN_DOWN,
300
+ LLM_TENSOR_DEC_FFN_UP,
301
+ LLM_TENSOR_DEC_OUTPUT_NORM,
302
+ LLM_TENSOR_ENC_ATTN_NORM,
303
+ LLM_TENSOR_ENC_ATTN_Q,
304
+ LLM_TENSOR_ENC_ATTN_K,
305
+ LLM_TENSOR_ENC_ATTN_V,
306
+ LLM_TENSOR_ENC_ATTN_OUT,
307
+ LLM_TENSOR_ENC_ATTN_REL_B,
308
+ LLM_TENSOR_ENC_FFN_NORM,
309
+ LLM_TENSOR_ENC_FFN_GATE,
310
+ LLM_TENSOR_ENC_FFN_DOWN,
311
+ LLM_TENSOR_ENC_FFN_UP,
312
+ LLM_TENSOR_ENC_OUTPUT_NORM,
313
+ LLM_TENSOR_CLS,
314
+ LLM_TENSOR_CLS_OUT,
315
+ LLM_TENSOR_CONV1D,
316
+ LLM_TENSOR_CONVNEXT_DW,
317
+ LLM_TENSOR_CONVNEXT_NORM,
318
+ LLM_TENSOR_CONVNEXT_PW1,
319
+ LLM_TENSOR_CONVNEXT_PW2,
320
+ LLM_TENSOR_CONVNEXT_GAMMA,
321
+ LLM_TENSOR_POS_NET_CONV1,
322
+ LLM_TENSOR_POS_NET_CONV2,
323
+ LLM_TENSOR_POS_NET_NORM,
324
+ LLM_TENSOR_POS_NET_NORM1,
325
+ LLM_TENSOR_POS_NET_NORM2,
326
+ LLM_TENSOR_POS_NET_ATTN_NORM,
327
+ LLM_TENSOR_POS_NET_ATTN_Q,
328
+ LLM_TENSOR_POS_NET_ATTN_K,
329
+ LLM_TENSOR_POS_NET_ATTN_V,
330
+ LLM_TENSOR_POS_NET_ATTN_OUT,
331
+ };
332
+
333
+ enum llm_tensor_layer {
334
+ LLM_TENSOR_LAYER_INPUT,
335
+ LLM_TENSOR_LAYER_REPEATING,
336
+ LLM_TENSOR_LAYER_OUTPUT,
337
+ };
338
+
339
+ struct LLM_KV {
340
+ LLM_KV(llm_arch arch, const char * suffix = nullptr);
341
+
342
+ llm_arch arch;
343
+ const char * suffix;
344
+
345
+ std::string operator()(llm_kv kv) const;
346
+ };
347
+
348
+ // helper to handle gguf constants
349
+ // usage:
350
+ //
351
+ // const auto tn = LLM_TN(LLM_ARCH_LLAMA);
352
+ //
353
+ // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
354
+ // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
355
+ // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
356
+ //
357
+ struct LLM_TN_IMPL {
358
+ const llm_arch arch;
359
+ const llm_tensor tensor;
360
+ const char * const suffix;
361
+ const int bid;
362
+ const int xid;
363
+
364
+ std::string str() const;
365
+
366
+ operator std::string() const {
367
+ return str();
368
+ }
369
+
370
+ friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
371
+ return str == tn.str();
372
+ }
373
+
374
+ friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
375
+ return str != tn.str();
376
+ }
377
+ };
378
+
379
+ struct LLM_TN {
380
+ LLM_TN(llm_arch arch) : arch(arch) {}
381
+
382
+ llm_arch arch;
383
+
384
+ LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
385
+ return { arch, tensor, suffix, bid, xid };
386
+ }
387
+
388
+ LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
389
+ return { arch, tensor, nullptr, bid, xid };
390
+ }
391
+ };
392
+
393
+
394
+ struct llm_tensor_info {
395
+ llm_tensor_layer layer;
396
+ lm_ggml_op op;
397
+ };
398
+
399
+ const char * llm_arch_name(llm_arch arch);
400
+
401
+ llm_arch llm_arch_from_string(const std::string & name);
402
+
403
+ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);