cui-llama.rn 1.4.4 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/android/src/main/CMakeLists.txt +9 -2
  2. package/android/src/main/jni.cpp +54 -34
  3. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  11. package/cpp/binary-ops.cpp +158 -0
  12. package/cpp/binary-ops.h +16 -0
  13. package/cpp/chat.cpp +1769 -1085
  14. package/cpp/chat.h +143 -0
  15. package/cpp/common.cpp +1562 -1996
  16. package/cpp/common.h +677 -744
  17. package/cpp/cpu-common.h +72 -0
  18. package/cpp/ggml-alloc.c +1039 -1030
  19. package/cpp/ggml-alloc.h +1 -1
  20. package/cpp/ggml-backend-impl.h +255 -255
  21. package/cpp/ggml-backend-reg.cpp +586 -582
  22. package/cpp/ggml-backend.cpp +2004 -2002
  23. package/cpp/ggml-backend.h +354 -354
  24. package/cpp/ggml-common.h +1857 -1851
  25. package/cpp/ggml-cpp.h +39 -39
  26. package/cpp/ggml-cpu-aarch64.cpp +5725 -4247
  27. package/cpp/ggml-cpu-aarch64.h +8 -8
  28. package/cpp/ggml-cpu-impl.h +512 -380
  29. package/cpp/ggml-cpu-quants.c +13026 -11517
  30. package/cpp/ggml-cpu-traits.cpp +36 -36
  31. package/cpp/ggml-cpu-traits.h +38 -38
  32. package/cpp/ggml-cpu.c +3438 -14485
  33. package/cpp/ggml-cpu.cpp +655 -633
  34. package/cpp/ggml-cpu.h +138 -135
  35. package/cpp/ggml-impl.h +594 -567
  36. package/cpp/ggml-metal-impl.h +312 -3
  37. package/cpp/ggml-metal.h +66 -66
  38. package/cpp/ggml-metal.m +5360 -5002
  39. package/cpp/ggml-opt.cpp +854 -854
  40. package/cpp/ggml-opt.h +216 -216
  41. package/cpp/ggml-quants.c +5238 -5238
  42. package/cpp/ggml-threading.h +14 -14
  43. package/cpp/ggml.c +6618 -6524
  44. package/cpp/ggml.h +2222 -2194
  45. package/cpp/gguf.cpp +1330 -1329
  46. package/cpp/gguf.h +202 -202
  47. package/cpp/json-schema-to-grammar.cpp +1024 -1025
  48. package/cpp/json-schema-to-grammar.h +21 -22
  49. package/cpp/json.hpp +24766 -24766
  50. package/cpp/llama-adapter.cpp +382 -347
  51. package/cpp/llama-adapter.h +76 -74
  52. package/cpp/llama-arch.cpp +1714 -1492
  53. package/cpp/llama-arch.h +428 -402
  54. package/cpp/llama-batch.cpp +368 -368
  55. package/cpp/llama-batch.h +88 -88
  56. package/cpp/llama-chat.cpp +640 -587
  57. package/cpp/llama-chat.h +56 -53
  58. package/cpp/llama-context.cpp +2831 -1775
  59. package/cpp/llama-context.h +265 -128
  60. package/cpp/llama-cparams.cpp +1 -1
  61. package/cpp/llama-cparams.h +38 -37
  62. package/cpp/llama-cpp.h +30 -30
  63. package/cpp/llama-grammar.cpp +1219 -1219
  64. package/cpp/llama-grammar.h +173 -164
  65. package/cpp/llama-graph.cpp +1695 -0
  66. package/cpp/llama-graph.h +592 -0
  67. package/cpp/llama-hparams.cpp +79 -71
  68. package/cpp/llama-hparams.h +156 -139
  69. package/cpp/llama-impl.cpp +167 -167
  70. package/cpp/llama-impl.h +61 -61
  71. package/cpp/llama-io.cpp +15 -0
  72. package/cpp/llama-io.h +35 -0
  73. package/cpp/llama-kv-cache.cpp +1380 -718
  74. package/cpp/llama-kv-cache.h +213 -218
  75. package/cpp/llama-memory.cpp +1 -0
  76. package/cpp/llama-memory.h +21 -0
  77. package/cpp/llama-mmap.cpp +600 -590
  78. package/cpp/llama-mmap.h +68 -68
  79. package/cpp/llama-model-loader.cpp +1129 -1124
  80. package/cpp/llama-model-loader.h +169 -167
  81. package/cpp/llama-model.cpp +13080 -4023
  82. package/cpp/llama-model.h +409 -370
  83. package/cpp/llama-sampling.cpp +2563 -2525
  84. package/cpp/llama-sampling.h +32 -32
  85. package/cpp/llama-vocab.cpp +3295 -3252
  86. package/cpp/llama-vocab.h +125 -125
  87. package/cpp/llama.cpp +351 -10137
  88. package/cpp/llama.h +1434 -1340
  89. package/cpp/log.cpp +427 -423
  90. package/cpp/log.h +132 -132
  91. package/cpp/{chat-template.hpp → minja/chat-template.hpp} +537 -529
  92. package/cpp/{minja.hpp → minja/minja.hpp} +2941 -2883
  93. package/cpp/ops.cpp +8723 -0
  94. package/cpp/ops.h +128 -0
  95. package/cpp/rn-llama.cpp +45 -71
  96. package/cpp/rn-llama.h +3 -3
  97. package/cpp/sampling.cpp +573 -532
  98. package/cpp/sgemm.cpp +3043 -2598
  99. package/cpp/sgemm.h +14 -14
  100. package/cpp/simd-mappings.h +888 -0
  101. package/cpp/speculative.cpp +278 -277
  102. package/cpp/speculative.h +28 -28
  103. package/cpp/unary-ops.cpp +186 -0
  104. package/cpp/unary-ops.h +28 -0
  105. package/cpp/vec.cpp +258 -0
  106. package/cpp/vec.h +802 -0
  107. package/ios/CMakeLists.txt +5 -2
  108. package/ios/RNLlama.mm +2 -2
  109. package/ios/RNLlamaContext.mm +40 -24
  110. package/package.json +1 -1
  111. package/src/NativeRNLlama.ts +6 -4
  112. package/src/index.ts +3 -1
  113. package/android/src/main/build-arm64/CMakeCache.txt +0 -429
  114. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +0 -81
  115. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +0 -101
  116. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  117. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  118. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeSystem.cmake +0 -15
  119. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.c +0 -904
  120. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdC/CMakeCCompilerId.o +0 -0
  121. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.cpp +0 -919
  122. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CompilerIdCXX/CMakeCXXCompilerId.o +0 -0
  123. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +0 -431
  124. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +0 -16
  125. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +0 -165
  126. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +0 -297
  127. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +0 -1
  128. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +0 -1
  129. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +0 -1
  130. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +0 -1
  131. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +0 -1
  132. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +0 -1
  133. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +0 -1
  134. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +0 -8
  135. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +0 -1
  136. package/android/src/main/build-arm64/CMakeFiles/progress.marks +0 -1
  137. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  138. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +0 -58
  139. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  140. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +0 -756
  141. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  142. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +0 -709
  143. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  144. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +0 -714
  145. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  146. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +0 -62
  147. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  148. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +0 -708
  149. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  150. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +0 -113
  151. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  152. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +0 -713
  153. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  154. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +0 -763
  155. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  156. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +0 -61
  157. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  158. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +0 -707
  159. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  160. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +0 -104
  161. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  162. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +0 -714
  163. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  164. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +0 -723
  165. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +0 -62
  166. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +0 -722
  167. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +0 -89
  168. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +0 -2
  169. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +0 -2
  170. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +0 -2
  171. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +0 -17
  172. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +0 -41
  173. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +0 -62
  174. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +0 -722
  175. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +0 -89
  176. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +0 -2
  177. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +0 -2
  178. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +0 -2
  179. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +0 -17
  180. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +0 -41
  181. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +0 -62
  182. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +0 -722
  183. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +0 -89
  184. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +0 -2
  185. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +0 -2
  186. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +0 -2
  187. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +0 -17
  188. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +0 -41
  189. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +0 -62
  190. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +0 -722
  191. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +0 -89
  192. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +0 -2
  193. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +0 -2
  194. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +0 -2
  195. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +0 -17
  196. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +0 -41
  197. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +0 -62
  198. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +0 -722
  199. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +0 -89
  200. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +0 -2
  201. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +0 -2
  202. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +0 -2
  203. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +0 -17
  204. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +0 -41
  205. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +0 -62
  206. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +0 -722
  207. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +0 -89
  208. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +0 -2
  209. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +0 -2
  210. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +0 -2
  211. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +0 -17
  212. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +0 -41
  213. package/android/src/main/build-arm64/Makefile +0 -1862
  214. package/android/src/main/build-arm64/cmake_install.cmake +0 -66
  215. package/cpp/chat.hpp +0 -55
  216. package/cpp/rn-llama.hpp +0 -913
package/cpp/llama-arch.h CHANGED
@@ -1,402 +1,428 @@
1
- #pragma once
2
-
3
- #include "ggml.h" // lm_ggml_op
4
-
5
- #include <string>
6
-
7
- //
8
- // gguf constants (sync with gguf.py)
9
- //
10
-
11
- enum llm_arch {
12
- LLM_ARCH_LLAMA,
13
- LLM_ARCH_DECI,
14
- LLM_ARCH_FALCON,
15
- LLM_ARCH_BAICHUAN,
16
- LLM_ARCH_GROK,
17
- LLM_ARCH_GPT2,
18
- LLM_ARCH_GPTJ,
19
- LLM_ARCH_GPTNEOX,
20
- LLM_ARCH_MPT,
21
- LLM_ARCH_STARCODER,
22
- LLM_ARCH_REFACT,
23
- LLM_ARCH_BERT,
24
- LLM_ARCH_NOMIC_BERT,
25
- LLM_ARCH_JINA_BERT_V2,
26
- LLM_ARCH_BLOOM,
27
- LLM_ARCH_STABLELM,
28
- LLM_ARCH_QWEN,
29
- LLM_ARCH_QWEN2,
30
- LLM_ARCH_QWEN2MOE,
31
- LLM_ARCH_QWEN2VL,
32
- LLM_ARCH_PHI2,
33
- LLM_ARCH_PHI3,
34
- LLM_ARCH_PHIMOE,
35
- LLM_ARCH_PLAMO,
36
- LLM_ARCH_CODESHELL,
37
- LLM_ARCH_ORION,
38
- LLM_ARCH_INTERNLM2,
39
- LLM_ARCH_MINICPM,
40
- LLM_ARCH_MINICPM3,
41
- LLM_ARCH_GEMMA,
42
- LLM_ARCH_GEMMA2,
43
- LLM_ARCH_STARCODER2,
44
- LLM_ARCH_MAMBA,
45
- LLM_ARCH_XVERSE,
46
- LLM_ARCH_COMMAND_R,
47
- LLM_ARCH_COHERE2,
48
- LLM_ARCH_DBRX,
49
- LLM_ARCH_OLMO,
50
- LLM_ARCH_OLMO2,
51
- LLM_ARCH_OLMOE,
52
- LLM_ARCH_OPENELM,
53
- LLM_ARCH_ARCTIC,
54
- LLM_ARCH_DEEPSEEK,
55
- LLM_ARCH_DEEPSEEK2,
56
- LLM_ARCH_CHATGLM,
57
- LLM_ARCH_BITNET,
58
- LLM_ARCH_T5,
59
- LLM_ARCH_T5ENCODER,
60
- LLM_ARCH_JAIS,
61
- LLM_ARCH_NEMOTRON,
62
- LLM_ARCH_EXAONE,
63
- LLM_ARCH_RWKV6,
64
- LLM_ARCH_RWKV6QWEN2,
65
- LLM_ARCH_GRANITE,
66
- LLM_ARCH_GRANITE_MOE,
67
- LLM_ARCH_CHAMELEON,
68
- LLM_ARCH_WAVTOKENIZER_DEC,
69
- LLM_ARCH_UNKNOWN,
70
- };
71
-
72
- enum llm_kv {
73
- LLM_KV_GENERAL_TYPE,
74
- LLM_KV_GENERAL_ARCHITECTURE,
75
- LLM_KV_GENERAL_QUANTIZATION_VERSION,
76
- LLM_KV_GENERAL_ALIGNMENT,
77
- LLM_KV_GENERAL_NAME,
78
- LLM_KV_GENERAL_AUTHOR,
79
- LLM_KV_GENERAL_VERSION,
80
- LLM_KV_GENERAL_URL,
81
- LLM_KV_GENERAL_DESCRIPTION,
82
- LLM_KV_GENERAL_LICENSE,
83
- LLM_KV_GENERAL_SOURCE_URL,
84
- LLM_KV_GENERAL_SOURCE_HF_REPO,
85
-
86
- LLM_KV_VOCAB_SIZE,
87
- LLM_KV_CONTEXT_LENGTH,
88
- LLM_KV_EMBEDDING_LENGTH,
89
- LLM_KV_FEATURES_LENGTH,
90
- LLM_KV_BLOCK_COUNT,
91
- LLM_KV_LEADING_DENSE_BLOCK_COUNT,
92
- LLM_KV_FEED_FORWARD_LENGTH,
93
- LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
94
- LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
95
- LLM_KV_USE_PARALLEL_RESIDUAL,
96
- LLM_KV_TENSOR_DATA_LAYOUT,
97
- LLM_KV_EXPERT_COUNT,
98
- LLM_KV_EXPERT_USED_COUNT,
99
- LLM_KV_EXPERT_SHARED_COUNT,
100
- LLM_KV_EXPERT_WEIGHTS_SCALE,
101
- LLM_KV_EXPERT_WEIGHTS_NORM,
102
- LLM_KV_EXPERT_GATING_FUNC,
103
- LLM_KV_POOLING_TYPE,
104
- LLM_KV_LOGIT_SCALE,
105
- LLM_KV_DECODER_START_TOKEN_ID,
106
- LLM_KV_ATTN_LOGIT_SOFTCAPPING,
107
- LLM_KV_FINAL_LOGIT_SOFTCAPPING,
108
- LLM_KV_SWIN_NORM,
109
- LLM_KV_RESCALE_EVERY_N_LAYERS,
110
- LLM_KV_TIME_MIX_EXTRA_DIM,
111
- LLM_KV_TIME_DECAY_EXTRA_DIM,
112
- LLM_KV_RESIDUAL_SCALE,
113
- LLM_KV_EMBEDDING_SCALE,
114
- LLM_KV_TOKEN_SHIFT_COUNT,
115
-
116
- LLM_KV_ATTENTION_HEAD_COUNT,
117
- LLM_KV_ATTENTION_HEAD_COUNT_KV,
118
- LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
119
- LLM_KV_ATTENTION_CLAMP_KQV,
120
- LLM_KV_ATTENTION_KEY_LENGTH,
121
- LLM_KV_ATTENTION_VALUE_LENGTH,
122
- LLM_KV_ATTENTION_LAYERNORM_EPS,
123
- LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
124
- LLM_KV_ATTENTION_GROUPNORM_EPS,
125
- LLM_KV_ATTENTION_GROUPNORM_GROUPS,
126
- LLM_KV_ATTENTION_CAUSAL,
127
- LLM_KV_ATTENTION_Q_LORA_RANK,
128
- LLM_KV_ATTENTION_KV_LORA_RANK,
129
- LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
130
- LLM_KV_ATTENTION_SLIDING_WINDOW,
131
- LLM_KV_ATTENTION_SCALE,
132
-
133
- LLM_KV_ROPE_DIMENSION_COUNT,
134
- LLM_KV_ROPE_DIMENSION_SECTIONS,
135
- LLM_KV_ROPE_FREQ_BASE,
136
- LLM_KV_ROPE_SCALE_LINEAR,
137
- LLM_KV_ROPE_SCALING_TYPE,
138
- LLM_KV_ROPE_SCALING_FACTOR,
139
- LLM_KV_ROPE_SCALING_ATTN_FACTOR,
140
- LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
141
- LLM_KV_ROPE_SCALING_FINETUNED,
142
- LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
143
-
144
- LLM_KV_SPLIT_NO,
145
- LLM_KV_SPLIT_COUNT,
146
- LLM_KV_SPLIT_TENSORS_COUNT,
147
-
148
- LLM_KV_SSM_INNER_SIZE,
149
- LLM_KV_SSM_CONV_KERNEL,
150
- LLM_KV_SSM_STATE_SIZE,
151
- LLM_KV_SSM_TIME_STEP_RANK,
152
- LLM_KV_SSM_DT_B_C_RMS,
153
-
154
- LLM_KV_WKV_HEAD_SIZE,
155
-
156
- LLM_KV_TOKENIZER_MODEL,
157
- LLM_KV_TOKENIZER_PRE,
158
- LLM_KV_TOKENIZER_LIST,
159
- LLM_KV_TOKENIZER_TOKEN_TYPE,
160
- LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
161
- LLM_KV_TOKENIZER_SCORES,
162
- LLM_KV_TOKENIZER_MERGES,
163
- LLM_KV_TOKENIZER_BOS_ID,
164
- LLM_KV_TOKENIZER_EOS_ID,
165
- LLM_KV_TOKENIZER_EOT_ID,
166
- LLM_KV_TOKENIZER_EOM_ID,
167
- LLM_KV_TOKENIZER_UNK_ID,
168
- LLM_KV_TOKENIZER_SEP_ID,
169
- LLM_KV_TOKENIZER_PAD_ID,
170
- LLM_KV_TOKENIZER_CLS_ID,
171
- LLM_KV_TOKENIZER_MASK_ID,
172
- LLM_KV_TOKENIZER_ADD_BOS,
173
- LLM_KV_TOKENIZER_ADD_EOS,
174
- LLM_KV_TOKENIZER_ADD_PREFIX,
175
- LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
176
- LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
177
- LLM_KV_TOKENIZER_HF_JSON,
178
- LLM_KV_TOKENIZER_RWKV,
179
- LLM_KV_TOKENIZER_CHAT_TEMPLATE,
180
- LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
181
- LLM_KV_TOKENIZER_FIM_PRE_ID,
182
- LLM_KV_TOKENIZER_FIM_SUF_ID,
183
- LLM_KV_TOKENIZER_FIM_MID_ID,
184
- LLM_KV_TOKENIZER_FIM_PAD_ID,
185
- LLM_KV_TOKENIZER_FIM_REP_ID,
186
- LLM_KV_TOKENIZER_FIM_SEP_ID,
187
-
188
- LLM_KV_ADAPTER_TYPE,
189
- LLM_KV_ADAPTER_LORA_ALPHA,
190
-
191
- LLM_KV_POSNET_EMBEDDING_LENGTH,
192
- LLM_KV_POSNET_BLOCK_COUNT,
193
-
194
- LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
195
- LLM_KV_CONVNEXT_BLOCK_COUNT,
196
-
197
- // deprecated:
198
- LLM_KV_TOKENIZER_PREFIX_ID,
199
- LLM_KV_TOKENIZER_SUFFIX_ID,
200
- LLM_KV_TOKENIZER_MIDDLE_ID,
201
- };
202
-
203
- enum llm_tensor {
204
- LLM_TENSOR_TOKEN_EMBD,
205
- LLM_TENSOR_TOKEN_EMBD_NORM,
206
- LLM_TENSOR_TOKEN_TYPES,
207
- LLM_TENSOR_POS_EMBD,
208
- LLM_TENSOR_OUTPUT,
209
- LLM_TENSOR_OUTPUT_NORM,
210
- LLM_TENSOR_ROPE_FREQS,
211
- LLM_TENSOR_ROPE_FACTORS_LONG,
212
- LLM_TENSOR_ROPE_FACTORS_SHORT,
213
- LLM_TENSOR_ATTN_Q,
214
- LLM_TENSOR_ATTN_K,
215
- LLM_TENSOR_ATTN_V,
216
- LLM_TENSOR_ATTN_QKV,
217
- LLM_TENSOR_ATTN_OUT,
218
- LLM_TENSOR_ATTN_NORM,
219
- LLM_TENSOR_ATTN_NORM_2,
220
- LLM_TENSOR_ATTN_OUT_NORM,
221
- LLM_TENSOR_ATTN_POST_NORM,
222
- LLM_TENSOR_ATTN_ROT_EMBD,
223
- LLM_TENSOR_FFN_GATE_INP,
224
- LLM_TENSOR_FFN_GATE_INP_SHEXP,
225
- LLM_TENSOR_FFN_NORM,
226
- LLM_TENSOR_FFN_POST_NORM,
227
- LLM_TENSOR_FFN_GATE,
228
- LLM_TENSOR_FFN_DOWN,
229
- LLM_TENSOR_FFN_UP,
230
- LLM_TENSOR_FFN_ACT,
231
- LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
232
- LLM_TENSOR_FFN_GATE_EXP,
233
- LLM_TENSOR_FFN_UP_EXP,
234
- LLM_TENSOR_FFN_NORM_EXPS,
235
- LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
236
- LLM_TENSOR_FFN_GATE_EXPS,
237
- LLM_TENSOR_FFN_UP_EXPS,
238
- LLM_TENSOR_FFN_DOWN_SHEXP,
239
- LLM_TENSOR_FFN_GATE_SHEXP,
240
- LLM_TENSOR_FFN_UP_SHEXP,
241
- LLM_TENSOR_FFN_EXP_PROBS_B,
242
- LLM_TENSOR_ATTN_Q_NORM,
243
- LLM_TENSOR_ATTN_K_NORM,
244
- LLM_TENSOR_LAYER_OUT_NORM,
245
- LLM_TENSOR_SSM_IN,
246
- LLM_TENSOR_SSM_CONV1D,
247
- LLM_TENSOR_SSM_X,
248
- LLM_TENSOR_SSM_DT,
249
- LLM_TENSOR_SSM_A,
250
- LLM_TENSOR_SSM_D,
251
- LLM_TENSOR_SSM_OUT,
252
- LLM_TENSOR_TIME_MIX_W1,
253
- LLM_TENSOR_TIME_MIX_W2,
254
- LLM_TENSOR_TIME_MIX_LERP_X,
255
- LLM_TENSOR_TIME_MIX_LERP_W,
256
- LLM_TENSOR_TIME_MIX_LERP_K,
257
- LLM_TENSOR_TIME_MIX_LERP_V,
258
- LLM_TENSOR_TIME_MIX_LERP_R,
259
- LLM_TENSOR_TIME_MIX_LERP_G,
260
- LLM_TENSOR_TIME_MIX_LERP_FUSED,
261
- LLM_TENSOR_TIME_MIX_FIRST,
262
- LLM_TENSOR_TIME_MIX_DECAY,
263
- LLM_TENSOR_TIME_MIX_DECAY_W1,
264
- LLM_TENSOR_TIME_MIX_DECAY_W2,
265
- LLM_TENSOR_TIME_MIX_KEY,
266
- LLM_TENSOR_TIME_MIX_VALUE,
267
- LLM_TENSOR_TIME_MIX_RECEPTANCE,
268
- LLM_TENSOR_TIME_MIX_GATE,
269
- LLM_TENSOR_TIME_MIX_LN,
270
- LLM_TENSOR_TIME_MIX_OUTPUT,
271
- LLM_TENSOR_CHANNEL_MIX_LERP_K,
272
- LLM_TENSOR_CHANNEL_MIX_LERP_R,
273
- LLM_TENSOR_CHANNEL_MIX_KEY,
274
- LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
275
- LLM_TENSOR_CHANNEL_MIX_VALUE,
276
- LLM_TENSOR_ATTN_Q_A,
277
- LLM_TENSOR_ATTN_Q_B,
278
- LLM_TENSOR_ATTN_KV_A_MQA,
279
- LLM_TENSOR_ATTN_KV_B,
280
- LLM_TENSOR_ATTN_Q_A_NORM,
281
- LLM_TENSOR_ATTN_KV_A_NORM,
282
- LLM_TENSOR_ATTN_SUB_NORM,
283
- LLM_TENSOR_FFN_SUB_NORM,
284
- LLM_TENSOR_DEC_ATTN_NORM,
285
- LLM_TENSOR_DEC_ATTN_Q,
286
- LLM_TENSOR_DEC_ATTN_K,
287
- LLM_TENSOR_DEC_ATTN_V,
288
- LLM_TENSOR_DEC_ATTN_OUT,
289
- LLM_TENSOR_DEC_ATTN_REL_B,
290
- LLM_TENSOR_DEC_CROSS_ATTN_NORM,
291
- LLM_TENSOR_DEC_CROSS_ATTN_Q,
292
- LLM_TENSOR_DEC_CROSS_ATTN_K,
293
- LLM_TENSOR_DEC_CROSS_ATTN_V,
294
- LLM_TENSOR_DEC_CROSS_ATTN_OUT,
295
- LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
296
- LLM_TENSOR_DEC_FFN_NORM,
297
- LLM_TENSOR_DEC_FFN_GATE,
298
- LLM_TENSOR_DEC_FFN_DOWN,
299
- LLM_TENSOR_DEC_FFN_UP,
300
- LLM_TENSOR_DEC_OUTPUT_NORM,
301
- LLM_TENSOR_ENC_ATTN_NORM,
302
- LLM_TENSOR_ENC_ATTN_Q,
303
- LLM_TENSOR_ENC_ATTN_K,
304
- LLM_TENSOR_ENC_ATTN_V,
305
- LLM_TENSOR_ENC_ATTN_OUT,
306
- LLM_TENSOR_ENC_ATTN_REL_B,
307
- LLM_TENSOR_ENC_FFN_NORM,
308
- LLM_TENSOR_ENC_FFN_GATE,
309
- LLM_TENSOR_ENC_FFN_DOWN,
310
- LLM_TENSOR_ENC_FFN_UP,
311
- LLM_TENSOR_ENC_OUTPUT_NORM,
312
- LLM_TENSOR_CLS,
313
- LLM_TENSOR_CLS_OUT,
314
- LLM_TENSOR_CONV1D,
315
- LLM_TENSOR_CONVNEXT_DW,
316
- LLM_TENSOR_CONVNEXT_NORM,
317
- LLM_TENSOR_CONVNEXT_PW1,
318
- LLM_TENSOR_CONVNEXT_PW2,
319
- LLM_TENSOR_CONVNEXT_GAMMA,
320
- LLM_TENSOR_POS_NET_CONV1,
321
- LLM_TENSOR_POS_NET_CONV2,
322
- LLM_TENSOR_POS_NET_NORM,
323
- LLM_TENSOR_POS_NET_NORM1,
324
- LLM_TENSOR_POS_NET_NORM2,
325
- LLM_TENSOR_POS_NET_ATTN_NORM,
326
- LLM_TENSOR_POS_NET_ATTN_Q,
327
- LLM_TENSOR_POS_NET_ATTN_K,
328
- LLM_TENSOR_POS_NET_ATTN_V,
329
- LLM_TENSOR_POS_NET_ATTN_OUT,
330
- };
331
-
332
- enum llm_tensor_layer {
333
- LLM_TENSOR_LAYER_INPUT,
334
- LLM_TENSOR_LAYER_REPEATING,
335
- LLM_TENSOR_LAYER_OUTPUT,
336
- };
337
-
338
- struct LLM_KV {
339
- LLM_KV(llm_arch arch, const char * suffix = nullptr);
340
-
341
- llm_arch arch;
342
- const char * suffix;
343
-
344
- std::string operator()(llm_kv kv) const;
345
- };
346
-
347
- // helper to handle gguf constants
348
- // usage:
349
- //
350
- // const auto tn = LLM_TN(LLM_ARCH_LLAMA);
351
- //
352
- // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
353
- // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
354
- // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
355
- //
356
- struct LLM_TN_IMPL {
357
- const llm_arch arch;
358
- const llm_tensor tensor;
359
- const char * const suffix;
360
- const int bid;
361
- const int xid;
362
-
363
- std::string str() const;
364
-
365
- operator std::string() const {
366
- return str();
367
- }
368
-
369
- friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
370
- return str == tn.str();
371
- }
372
-
373
- friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
374
- return str != tn.str();
375
- }
376
- };
377
-
378
- struct LLM_TN {
379
- LLM_TN(llm_arch arch) : arch(arch) {}
380
-
381
- llm_arch arch;
382
-
383
- LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
384
- return { arch, tensor, suffix, bid, xid };
385
- }
386
-
387
- LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
388
- return { arch, tensor, nullptr, bid, xid };
389
- }
390
- };
391
-
392
-
393
- struct llm_tensor_info {
394
- llm_tensor_layer layer;
395
- lm_ggml_op op;
396
- };
397
-
398
- const char * llm_arch_name(llm_arch arch);
399
-
400
- llm_arch llm_arch_from_string(const std::string & name);
401
-
402
- const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
1
+ #pragma once
2
+
3
+ #include "ggml.h" // lm_ggml_op
4
+
5
+ #include <string>
6
+
7
+ //
8
+ // gguf constants (sync with gguf.py)
9
+ //
10
+
11
+ enum llm_arch {
12
+ LLM_ARCH_LLAMA,
13
+ LLM_ARCH_LLAMA4,
14
+ LLM_ARCH_DECI,
15
+ LLM_ARCH_FALCON,
16
+ LLM_ARCH_BAICHUAN,
17
+ LLM_ARCH_GROK,
18
+ LLM_ARCH_GPT2,
19
+ LLM_ARCH_GPTJ,
20
+ LLM_ARCH_GPTNEOX,
21
+ LLM_ARCH_MPT,
22
+ LLM_ARCH_STARCODER,
23
+ LLM_ARCH_REFACT,
24
+ LLM_ARCH_BERT,
25
+ LLM_ARCH_NOMIC_BERT,
26
+ LLM_ARCH_JINA_BERT_V2,
27
+ LLM_ARCH_BLOOM,
28
+ LLM_ARCH_STABLELM,
29
+ LLM_ARCH_QWEN,
30
+ LLM_ARCH_QWEN2,
31
+ LLM_ARCH_QWEN2MOE,
32
+ LLM_ARCH_QWEN2VL,
33
+ LLM_ARCH_QWEN3,
34
+ LLM_ARCH_QWEN3MOE,
35
+ LLM_ARCH_PHI2,
36
+ LLM_ARCH_PHI3,
37
+ LLM_ARCH_PHIMOE,
38
+ LLM_ARCH_PLAMO,
39
+ LLM_ARCH_CODESHELL,
40
+ LLM_ARCH_ORION,
41
+ LLM_ARCH_INTERNLM2,
42
+ LLM_ARCH_MINICPM,
43
+ LLM_ARCH_MINICPM3,
44
+ LLM_ARCH_GEMMA,
45
+ LLM_ARCH_GEMMA2,
46
+ LLM_ARCH_GEMMA3,
47
+ LLM_ARCH_STARCODER2,
48
+ LLM_ARCH_MAMBA,
49
+ LLM_ARCH_XVERSE,
50
+ LLM_ARCH_COMMAND_R,
51
+ LLM_ARCH_COHERE2,
52
+ LLM_ARCH_DBRX,
53
+ LLM_ARCH_OLMO,
54
+ LLM_ARCH_OLMO2,
55
+ LLM_ARCH_OLMOE,
56
+ LLM_ARCH_OPENELM,
57
+ LLM_ARCH_ARCTIC,
58
+ LLM_ARCH_DEEPSEEK,
59
+ LLM_ARCH_DEEPSEEK2,
60
+ LLM_ARCH_CHATGLM,
61
+ LLM_ARCH_BITNET,
62
+ LLM_ARCH_T5,
63
+ LLM_ARCH_T5ENCODER,
64
+ LLM_ARCH_JAIS,
65
+ LLM_ARCH_NEMOTRON,
66
+ LLM_ARCH_EXAONE,
67
+ LLM_ARCH_RWKV6,
68
+ LLM_ARCH_RWKV6QWEN2,
69
+ LLM_ARCH_RWKV7,
70
+ LLM_ARCH_ARWKV7,
71
+ LLM_ARCH_GRANITE,
72
+ LLM_ARCH_GRANITE_MOE,
73
+ LLM_ARCH_CHAMELEON,
74
+ LLM_ARCH_WAVTOKENIZER_DEC,
75
+ LLM_ARCH_PLM,
76
+ LLM_ARCH_BAILINGMOE,
77
+ LLM_ARCH_UNKNOWN,
78
+ };
79
+
80
+ enum llm_kv {
81
+ LLM_KV_GENERAL_TYPE,
82
+ LLM_KV_GENERAL_ARCHITECTURE,
83
+ LLM_KV_GENERAL_QUANTIZATION_VERSION,
84
+ LLM_KV_GENERAL_ALIGNMENT,
85
+ LLM_KV_GENERAL_FILE_TYPE,
86
+ LLM_KV_GENERAL_NAME,
87
+ LLM_KV_GENERAL_AUTHOR,
88
+ LLM_KV_GENERAL_VERSION,
89
+ LLM_KV_GENERAL_URL,
90
+ LLM_KV_GENERAL_DESCRIPTION,
91
+ LLM_KV_GENERAL_LICENSE,
92
+ LLM_KV_GENERAL_SOURCE_URL,
93
+ LLM_KV_GENERAL_SOURCE_HF_REPO,
94
+
95
+ LLM_KV_VOCAB_SIZE,
96
+ LLM_KV_CONTEXT_LENGTH,
97
+ LLM_KV_EMBEDDING_LENGTH,
98
+ LLM_KV_FEATURES_LENGTH,
99
+ LLM_KV_BLOCK_COUNT,
100
+ LLM_KV_LEADING_DENSE_BLOCK_COUNT,
101
+ LLM_KV_FEED_FORWARD_LENGTH,
102
+ LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
103
+ LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
104
+ LLM_KV_USE_PARALLEL_RESIDUAL,
105
+ LLM_KV_TENSOR_DATA_LAYOUT,
106
+ LLM_KV_EXPERT_COUNT,
107
+ LLM_KV_EXPERT_USED_COUNT,
108
+ LLM_KV_EXPERT_SHARED_COUNT,
109
+ LLM_KV_EXPERT_WEIGHTS_SCALE,
110
+ LLM_KV_EXPERT_WEIGHTS_NORM,
111
+ LLM_KV_EXPERT_GATING_FUNC,
112
+ LLM_KV_POOLING_TYPE,
113
+ LLM_KV_LOGIT_SCALE,
114
+ LLM_KV_DECODER_START_TOKEN_ID,
115
+ LLM_KV_ATTN_LOGIT_SOFTCAPPING,
116
+ LLM_KV_FINAL_LOGIT_SOFTCAPPING,
117
+ LLM_KV_SWIN_NORM,
118
+ LLM_KV_RESCALE_EVERY_N_LAYERS,
119
+ LLM_KV_TIME_MIX_EXTRA_DIM,
120
+ LLM_KV_TIME_DECAY_EXTRA_DIM,
121
+ LLM_KV_RESIDUAL_SCALE,
122
+ LLM_KV_EMBEDDING_SCALE,
123
+ LLM_KV_TOKEN_SHIFT_COUNT,
124
+ LLM_KV_INTERLEAVE_MOE_LAYER_STEP,
125
+
126
+ LLM_KV_ATTENTION_HEAD_COUNT,
127
+ LLM_KV_ATTENTION_HEAD_COUNT_KV,
128
+ LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
129
+ LLM_KV_ATTENTION_CLAMP_KQV,
130
+ LLM_KV_ATTENTION_KEY_LENGTH,
131
+ LLM_KV_ATTENTION_VALUE_LENGTH,
132
+ LLM_KV_ATTENTION_LAYERNORM_EPS,
133
+ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
134
+ LLM_KV_ATTENTION_GROUPNORM_EPS,
135
+ LLM_KV_ATTENTION_GROUPNORM_GROUPS,
136
+ LLM_KV_ATTENTION_CAUSAL,
137
+ LLM_KV_ATTENTION_Q_LORA_RANK,
138
+ LLM_KV_ATTENTION_KV_LORA_RANK,
139
+ LLM_KV_ATTENTION_DECAY_LORA_RANK,
140
+ LLM_KV_ATTENTION_ICLR_LORA_RANK,
141
+ LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK,
142
+ LLM_KV_ATTENTION_GATE_LORA_RANK,
143
+ LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
144
+ LLM_KV_ATTENTION_SLIDING_WINDOW,
145
+ LLM_KV_ATTENTION_SCALE,
146
+
147
+ LLM_KV_ROPE_DIMENSION_COUNT,
148
+ LLM_KV_ROPE_DIMENSION_SECTIONS,
149
+ LLM_KV_ROPE_FREQ_BASE,
150
+ LLM_KV_ROPE_SCALE_LINEAR,
151
+ LLM_KV_ROPE_SCALING_TYPE,
152
+ LLM_KV_ROPE_SCALING_FACTOR,
153
+ LLM_KV_ROPE_SCALING_ATTN_FACTOR,
154
+ LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
155
+ LLM_KV_ROPE_SCALING_FINETUNED,
156
+ LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
157
+
158
+ LLM_KV_SPLIT_NO,
159
+ LLM_KV_SPLIT_COUNT,
160
+ LLM_KV_SPLIT_TENSORS_COUNT,
161
+
162
+ LLM_KV_SSM_INNER_SIZE,
163
+ LLM_KV_SSM_CONV_KERNEL,
164
+ LLM_KV_SSM_STATE_SIZE,
165
+ LLM_KV_SSM_TIME_STEP_RANK,
166
+ LLM_KV_SSM_DT_B_C_RMS,
167
+
168
+ LLM_KV_WKV_HEAD_SIZE,
169
+
170
+ LLM_KV_TOKENIZER_MODEL,
171
+ LLM_KV_TOKENIZER_PRE,
172
+ LLM_KV_TOKENIZER_LIST,
173
+ LLM_KV_TOKENIZER_TOKEN_TYPE,
174
+ LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
175
+ LLM_KV_TOKENIZER_SCORES,
176
+ LLM_KV_TOKENIZER_MERGES,
177
+ LLM_KV_TOKENIZER_BOS_ID,
178
+ LLM_KV_TOKENIZER_EOS_ID,
179
+ LLM_KV_TOKENIZER_EOT_ID,
180
+ LLM_KV_TOKENIZER_EOM_ID,
181
+ LLM_KV_TOKENIZER_UNK_ID,
182
+ LLM_KV_TOKENIZER_SEP_ID,
183
+ LLM_KV_TOKENIZER_PAD_ID,
184
+ LLM_KV_TOKENIZER_CLS_ID,
185
+ LLM_KV_TOKENIZER_MASK_ID,
186
+ LLM_KV_TOKENIZER_ADD_BOS,
187
+ LLM_KV_TOKENIZER_ADD_EOS,
188
+ LLM_KV_TOKENIZER_ADD_PREFIX,
189
+ LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
190
+ LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
191
+ LLM_KV_TOKENIZER_HF_JSON,
192
+ LLM_KV_TOKENIZER_RWKV,
193
+ LLM_KV_TOKENIZER_CHAT_TEMPLATE,
194
+ LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
195
+ LLM_KV_TOKENIZER_FIM_PRE_ID,
196
+ LLM_KV_TOKENIZER_FIM_SUF_ID,
197
+ LLM_KV_TOKENIZER_FIM_MID_ID,
198
+ LLM_KV_TOKENIZER_FIM_PAD_ID,
199
+ LLM_KV_TOKENIZER_FIM_REP_ID,
200
+ LLM_KV_TOKENIZER_FIM_SEP_ID,
201
+
202
+ LLM_KV_ADAPTER_TYPE,
203
+ LLM_KV_ADAPTER_LORA_ALPHA,
204
+
205
+ LLM_KV_POSNET_EMBEDDING_LENGTH,
206
+ LLM_KV_POSNET_BLOCK_COUNT,
207
+
208
+ LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
209
+ LLM_KV_CONVNEXT_BLOCK_COUNT,
210
+
211
+ // deprecated:
212
+ LLM_KV_TOKENIZER_PREFIX_ID,
213
+ LLM_KV_TOKENIZER_SUFFIX_ID,
214
+ LLM_KV_TOKENIZER_MIDDLE_ID,
215
+ };
216
+
217
+ enum llm_tensor {
218
+ LLM_TENSOR_TOKEN_EMBD,
219
+ LLM_TENSOR_TOKEN_EMBD_NORM,
220
+ LLM_TENSOR_TOKEN_TYPES,
221
+ LLM_TENSOR_POS_EMBD,
222
+ LLM_TENSOR_OUTPUT,
223
+ LLM_TENSOR_OUTPUT_NORM,
224
+ LLM_TENSOR_ROPE_FREQS,
225
+ LLM_TENSOR_ROPE_FACTORS_LONG,
226
+ LLM_TENSOR_ROPE_FACTORS_SHORT,
227
+ LLM_TENSOR_ATTN_Q,
228
+ LLM_TENSOR_ATTN_K,
229
+ LLM_TENSOR_ATTN_V,
230
+ LLM_TENSOR_ATTN_QKV,
231
+ LLM_TENSOR_ATTN_OUT,
232
+ LLM_TENSOR_ATTN_NORM,
233
+ LLM_TENSOR_ATTN_NORM_2,
234
+ LLM_TENSOR_ATTN_OUT_NORM,
235
+ LLM_TENSOR_ATTN_POST_NORM,
236
+ LLM_TENSOR_ATTN_ROT_EMBD,
237
+ LLM_TENSOR_FFN_GATE_INP,
238
+ LLM_TENSOR_FFN_GATE_INP_SHEXP,
239
+ LLM_TENSOR_FFN_NORM,
240
+ LLM_TENSOR_FFN_POST_NORM,
241
+ LLM_TENSOR_FFN_GATE,
242
+ LLM_TENSOR_FFN_DOWN,
243
+ LLM_TENSOR_FFN_UP,
244
+ LLM_TENSOR_FFN_ACT,
245
+ LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
246
+ LLM_TENSOR_FFN_GATE_EXP,
247
+ LLM_TENSOR_FFN_UP_EXP,
248
+ LLM_TENSOR_FFN_NORM_EXPS,
249
+ LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
250
+ LLM_TENSOR_FFN_GATE_EXPS,
251
+ LLM_TENSOR_FFN_UP_EXPS,
252
+ LLM_TENSOR_FFN_DOWN_SHEXP,
253
+ LLM_TENSOR_FFN_GATE_SHEXP,
254
+ LLM_TENSOR_FFN_UP_SHEXP,
255
+ LLM_TENSOR_FFN_EXP_PROBS_B,
256
+ LLM_TENSOR_ATTN_Q_NORM,
257
+ LLM_TENSOR_ATTN_K_NORM,
258
+ LLM_TENSOR_LAYER_OUT_NORM,
259
+ LLM_TENSOR_SSM_IN,
260
+ LLM_TENSOR_SSM_CONV1D,
261
+ LLM_TENSOR_SSM_X,
262
+ LLM_TENSOR_SSM_DT,
263
+ LLM_TENSOR_SSM_A,
264
+ LLM_TENSOR_SSM_D,
265
+ LLM_TENSOR_SSM_OUT,
266
+ LLM_TENSOR_TIME_MIX_W0,
267
+ LLM_TENSOR_TIME_MIX_W1,
268
+ LLM_TENSOR_TIME_MIX_W2,
269
+ LLM_TENSOR_TIME_MIX_A0,
270
+ LLM_TENSOR_TIME_MIX_A1,
271
+ LLM_TENSOR_TIME_MIX_A2,
272
+ LLM_TENSOR_TIME_MIX_V0,
273
+ LLM_TENSOR_TIME_MIX_V1,
274
+ LLM_TENSOR_TIME_MIX_V2,
275
+ LLM_TENSOR_TIME_MIX_G1,
276
+ LLM_TENSOR_TIME_MIX_G2,
277
+ LLM_TENSOR_TIME_MIX_K_K,
278
+ LLM_TENSOR_TIME_MIX_K_A,
279
+ LLM_TENSOR_TIME_MIX_R_K,
280
+ LLM_TENSOR_TIME_MIX_LERP_X,
281
+ LLM_TENSOR_TIME_MIX_LERP_W,
282
+ LLM_TENSOR_TIME_MIX_LERP_K,
283
+ LLM_TENSOR_TIME_MIX_LERP_V,
284
+ LLM_TENSOR_TIME_MIX_LERP_R,
285
+ LLM_TENSOR_TIME_MIX_LERP_G,
286
+ LLM_TENSOR_TIME_MIX_LERP_FUSED,
287
+ LLM_TENSOR_TIME_MIX_FIRST,
288
+ LLM_TENSOR_TIME_MIX_DECAY,
289
+ LLM_TENSOR_TIME_MIX_DECAY_W1,
290
+ LLM_TENSOR_TIME_MIX_DECAY_W2,
291
+ LLM_TENSOR_TIME_MIX_KEY,
292
+ LLM_TENSOR_TIME_MIX_VALUE,
293
+ LLM_TENSOR_TIME_MIX_RECEPTANCE,
294
+ LLM_TENSOR_TIME_MIX_GATE,
295
+ LLM_TENSOR_TIME_MIX_LN,
296
+ LLM_TENSOR_TIME_MIX_OUTPUT,
297
+ LLM_TENSOR_CHANNEL_MIX_LERP_K,
298
+ LLM_TENSOR_CHANNEL_MIX_LERP_R,
299
+ LLM_TENSOR_CHANNEL_MIX_KEY,
300
+ LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
301
+ LLM_TENSOR_CHANNEL_MIX_VALUE,
302
+ LLM_TENSOR_ATTN_Q_A,
303
+ LLM_TENSOR_ATTN_Q_B,
304
+ LLM_TENSOR_ATTN_KV_A_MQA,
305
+ LLM_TENSOR_ATTN_KV_B,
306
+ LLM_TENSOR_ATTN_Q_A_NORM,
307
+ LLM_TENSOR_ATTN_KV_A_NORM,
308
+ LLM_TENSOR_ATTN_SUB_NORM,
309
+ LLM_TENSOR_FFN_SUB_NORM,
310
+ LLM_TENSOR_DEC_ATTN_NORM,
311
+ LLM_TENSOR_DEC_ATTN_Q,
312
+ LLM_TENSOR_DEC_ATTN_K,
313
+ LLM_TENSOR_DEC_ATTN_V,
314
+ LLM_TENSOR_DEC_ATTN_OUT,
315
+ LLM_TENSOR_DEC_ATTN_REL_B,
316
+ LLM_TENSOR_DEC_CROSS_ATTN_NORM,
317
+ LLM_TENSOR_DEC_CROSS_ATTN_Q,
318
+ LLM_TENSOR_DEC_CROSS_ATTN_K,
319
+ LLM_TENSOR_DEC_CROSS_ATTN_V,
320
+ LLM_TENSOR_DEC_CROSS_ATTN_OUT,
321
+ LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
322
+ LLM_TENSOR_DEC_FFN_NORM,
323
+ LLM_TENSOR_DEC_FFN_GATE,
324
+ LLM_TENSOR_DEC_FFN_DOWN,
325
+ LLM_TENSOR_DEC_FFN_UP,
326
+ LLM_TENSOR_DEC_OUTPUT_NORM,
327
+ LLM_TENSOR_ENC_ATTN_NORM,
328
+ LLM_TENSOR_ENC_ATTN_Q,
329
+ LLM_TENSOR_ENC_ATTN_K,
330
+ LLM_TENSOR_ENC_ATTN_V,
331
+ LLM_TENSOR_ENC_ATTN_OUT,
332
+ LLM_TENSOR_ENC_ATTN_REL_B,
333
+ LLM_TENSOR_ENC_FFN_NORM,
334
+ LLM_TENSOR_ENC_FFN_GATE,
335
+ LLM_TENSOR_ENC_FFN_DOWN,
336
+ LLM_TENSOR_ENC_FFN_UP,
337
+ LLM_TENSOR_ENC_OUTPUT_NORM,
338
+ LLM_TENSOR_CLS,
339
+ LLM_TENSOR_CLS_OUT,
340
+ LLM_TENSOR_CONV1D,
341
+ LLM_TENSOR_CONVNEXT_DW,
342
+ LLM_TENSOR_CONVNEXT_NORM,
343
+ LLM_TENSOR_CONVNEXT_PW1,
344
+ LLM_TENSOR_CONVNEXT_PW2,
345
+ LLM_TENSOR_CONVNEXT_GAMMA,
346
+ LLM_TENSOR_POS_NET_CONV1,
347
+ LLM_TENSOR_POS_NET_CONV2,
348
+ LLM_TENSOR_POS_NET_NORM,
349
+ LLM_TENSOR_POS_NET_NORM1,
350
+ LLM_TENSOR_POS_NET_NORM2,
351
+ LLM_TENSOR_POS_NET_ATTN_NORM,
352
+ LLM_TENSOR_POS_NET_ATTN_Q,
353
+ LLM_TENSOR_POS_NET_ATTN_K,
354
+ LLM_TENSOR_POS_NET_ATTN_V,
355
+ LLM_TENSOR_POS_NET_ATTN_OUT,
356
+ };
357
+
358
+ enum llm_tensor_layer {
359
+ LLM_TENSOR_LAYER_INPUT,
360
+ LLM_TENSOR_LAYER_REPEATING,
361
+ LLM_TENSOR_LAYER_OUTPUT,
362
+ };
363
+
364
+ struct LLM_KV {
365
+ LLM_KV(llm_arch arch, const char * suffix = nullptr);
366
+
367
+ llm_arch arch;
368
+ const char * suffix;
369
+
370
+ std::string operator()(llm_kv kv) const;
371
+ };
372
+
373
+ // helper to handle gguf constants
374
+ // usage:
375
+ //
376
+ // const auto tn = LLM_TN(LLM_ARCH_LLAMA);
377
+ //
378
+ // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
379
+ // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
380
+ // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
381
+ //
382
+ struct LLM_TN_IMPL {
383
+ const llm_arch arch;
384
+ const llm_tensor tensor;
385
+ const char * const suffix;
386
+ const int bid;
387
+ const int xid;
388
+
389
+ std::string str() const;
390
+
391
+ operator std::string() const {
392
+ return str();
393
+ }
394
+
395
+ friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
396
+ return str == tn.str();
397
+ }
398
+
399
+ friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
400
+ return str != tn.str();
401
+ }
402
+ };
403
+
404
+ struct LLM_TN {
405
+ LLM_TN(llm_arch arch) : arch(arch) {}
406
+
407
+ llm_arch arch;
408
+
409
+ LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
410
+ return { arch, tensor, suffix, bid, xid };
411
+ }
412
+
413
+ LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
414
+ return { arch, tensor, nullptr, bid, xid };
415
+ }
416
+ };
417
+
418
+
419
+ struct llm_tensor_info {
420
+ llm_tensor_layer layer;
421
+ lm_ggml_op op;
422
+ };
423
+
424
+ const char * llm_arch_name(llm_arch arch);
425
+
426
+ llm_arch llm_arch_from_string(const std::string & name);
427
+
428
+ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);