@fugood/llama.node 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. package/README.md +17 -2
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +3 -1
  19. package/lib/index.js +16 -1
  20. package/lib/index.ts +16 -0
  21. package/package.json +1 -1
  22. package/src/EmbeddingWorker.cpp +4 -3
  23. package/src/LlamaCompletionWorker.cpp +4 -2
  24. package/src/LlamaContext.cpp +61 -6
  25. package/src/LlamaContext.h +1 -0
  26. package/src/common.hpp +6 -11
  27. package/src/llama.cpp/.github/workflows/build.yml +19 -17
  28. package/src/llama.cpp/.github/workflows/docker.yml +77 -30
  29. package/src/llama.cpp/.github/workflows/editorconfig.yml +3 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +22 -3
  31. package/src/llama.cpp/CMakeLists.txt +49 -24
  32. package/src/llama.cpp/common/arg.cpp +82 -26
  33. package/src/llama.cpp/common/arg.h +3 -0
  34. package/src/llama.cpp/common/common.cpp +192 -72
  35. package/src/llama.cpp/common/common.h +51 -18
  36. package/src/llama.cpp/common/ngram-cache.cpp +12 -12
  37. package/src/llama.cpp/common/ngram-cache.h +2 -2
  38. package/src/llama.cpp/common/sampling.cpp +11 -6
  39. package/src/llama.cpp/common/speculative.cpp +18 -15
  40. package/src/llama.cpp/docs/build.md +2 -0
  41. package/src/llama.cpp/examples/batched/batched.cpp +9 -7
  42. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +3 -3
  43. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +10 -8
  44. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +11 -8
  45. package/src/llama.cpp/examples/cvector-generator/mean.hpp +1 -1
  46. package/src/llama.cpp/examples/cvector-generator/pca.hpp +1 -1
  47. package/src/llama.cpp/examples/embedding/embedding.cpp +8 -7
  48. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +7 -6
  49. package/src/llama.cpp/examples/export-lora/export-lora.cpp +8 -7
  50. package/src/llama.cpp/examples/gguf/gguf.cpp +10 -6
  51. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +1 -0
  52. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +8 -7
  53. package/src/llama.cpp/examples/gritlm/gritlm.cpp +13 -10
  54. package/src/llama.cpp/examples/imatrix/imatrix.cpp +13 -12
  55. package/src/llama.cpp/examples/infill/infill.cpp +23 -24
  56. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +44 -13
  57. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -6
  58. package/src/llama.cpp/examples/llava/clip.cpp +4 -2
  59. package/src/llama.cpp/examples/llava/llava-cli.cpp +9 -6
  60. package/src/llama.cpp/examples/llava/llava.cpp +2 -2
  61. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +8 -4
  62. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +11 -8
  63. package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -7
  64. package/src/llama.cpp/examples/lookup/lookup-create.cpp +4 -9
  65. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +3 -7
  66. package/src/llama.cpp/examples/lookup/lookup.cpp +5 -6
  67. package/src/llama.cpp/examples/main/main.cpp +51 -29
  68. package/src/llama.cpp/examples/parallel/parallel.cpp +5 -6
  69. package/src/llama.cpp/examples/passkey/passkey.cpp +7 -5
  70. package/src/llama.cpp/examples/perplexity/perplexity.cpp +37 -23
  71. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -14
  72. package/src/llama.cpp/examples/retrieval/retrieval.cpp +8 -8
  73. package/src/llama.cpp/examples/rpc/rpc-server.cpp +12 -0
  74. package/src/llama.cpp/examples/run/CMakeLists.txt +1 -1
  75. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +1351 -0
  76. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +114 -0
  77. package/src/llama.cpp/examples/run/run.cpp +175 -61
  78. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -25
  79. package/src/llama.cpp/examples/server/CMakeLists.txt +1 -0
  80. package/src/llama.cpp/examples/server/httplib.h +1295 -409
  81. package/src/llama.cpp/examples/server/server.cpp +387 -181
  82. package/src/llama.cpp/examples/server/tests/requirements.txt +1 -0
  83. package/src/llama.cpp/examples/server/utils.hpp +170 -58
  84. package/src/llama.cpp/examples/simple/simple.cpp +9 -8
  85. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +16 -12
  86. package/src/llama.cpp/examples/speculative/speculative.cpp +22 -23
  87. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +8 -12
  88. package/src/llama.cpp/examples/tokenize/tokenize.cpp +17 -5
  89. package/src/llama.cpp/examples/tts/tts.cpp +64 -23
  90. package/src/llama.cpp/ggml/CMakeLists.txt +5 -21
  91. package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
  92. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -0
  93. package/src/llama.cpp/ggml/include/ggml.h +36 -145
  94. package/src/llama.cpp/ggml/include/gguf.h +202 -0
  95. package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
  96. package/src/llama.cpp/ggml/src/ggml-alloc.c +5 -0
  97. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -1
  98. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +79 -49
  99. package/src/llama.cpp/ggml/src/ggml-backend.cpp +5 -2
  100. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +33 -23
  101. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +57 -72
  102. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +87 -2
  103. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +335 -66
  104. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -2
  105. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1090 -378
  106. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +2 -2
  107. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +1 -0
  108. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +3 -0
  109. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -0
  110. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +3 -1
  111. package/src/llama.cpp/ggml/src/ggml-impl.h +11 -16
  112. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +16 -0
  113. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +154 -35
  115. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  116. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +9 -3
  117. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +18 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
  119. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +1 -2
  120. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +3 -2
  121. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +1 -2
  122. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +40 -95
  123. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +48 -48
  124. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +24 -24
  125. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -164
  126. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
  127. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
  128. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +3 -3
  129. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +1 -2
  130. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -2
  131. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +1 -2
  132. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +7 -5
  133. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +1 -2
  134. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +74 -4
  135. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +314 -116
  136. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -2
  137. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +9 -3
  138. package/src/llama.cpp/ggml/src/ggml.c +117 -1327
  139. package/src/llama.cpp/ggml/src/gguf.cpp +1329 -0
  140. package/src/llama.cpp/include/llama-cpp.h +6 -1
  141. package/src/llama.cpp/include/llama.h +138 -75
  142. package/src/llama.cpp/src/CMakeLists.txt +13 -1
  143. package/src/llama.cpp/src/llama-adapter.cpp +347 -0
  144. package/src/llama.cpp/src/llama-adapter.h +74 -0
  145. package/src/llama.cpp/src/llama-arch.cpp +1487 -0
  146. package/src/llama.cpp/src/llama-arch.h +400 -0
  147. package/src/llama.cpp/src/llama-batch.cpp +368 -0
  148. package/src/llama.cpp/src/llama-batch.h +88 -0
  149. package/src/llama.cpp/src/llama-chat.cpp +578 -0
  150. package/src/llama.cpp/src/llama-chat.h +52 -0
  151. package/src/llama.cpp/src/llama-context.cpp +1775 -0
  152. package/src/llama.cpp/src/llama-context.h +128 -0
  153. package/src/llama.cpp/src/llama-cparams.cpp +1 -0
  154. package/src/llama.cpp/src/llama-cparams.h +37 -0
  155. package/src/llama.cpp/src/llama-grammar.cpp +5 -4
  156. package/src/llama.cpp/src/llama-grammar.h +3 -1
  157. package/src/llama.cpp/src/llama-hparams.cpp +71 -0
  158. package/src/llama.cpp/src/llama-hparams.h +139 -0
  159. package/src/llama.cpp/src/llama-impl.cpp +167 -0
  160. package/src/llama.cpp/src/llama-impl.h +16 -136
  161. package/src/llama.cpp/src/llama-kv-cache.cpp +718 -0
  162. package/src/llama.cpp/src/llama-kv-cache.h +218 -0
  163. package/src/llama.cpp/src/llama-mmap.cpp +589 -0
  164. package/src/llama.cpp/src/llama-mmap.h +67 -0
  165. package/src/llama.cpp/src/llama-model-loader.cpp +1124 -0
  166. package/src/llama.cpp/src/llama-model-loader.h +167 -0
  167. package/src/llama.cpp/src/llama-model.cpp +3953 -0
  168. package/src/llama.cpp/src/llama-model.h +370 -0
  169. package/src/llama.cpp/src/llama-quant.cpp +934 -0
  170. package/src/llama.cpp/src/llama-quant.h +1 -0
  171. package/src/llama.cpp/src/llama-sampling.cpp +147 -32
  172. package/src/llama.cpp/src/llama-sampling.h +3 -19
  173. package/src/llama.cpp/src/llama-vocab.cpp +1832 -575
  174. package/src/llama.cpp/src/llama-vocab.h +97 -142
  175. package/src/llama.cpp/src/llama.cpp +7160 -20314
  176. package/src/llama.cpp/src/unicode.cpp +8 -3
  177. package/src/llama.cpp/tests/CMakeLists.txt +2 -0
  178. package/src/llama.cpp/tests/test-autorelease.cpp +3 -3
  179. package/src/llama.cpp/tests/test-backend-ops.cpp +370 -59
  180. package/src/llama.cpp/tests/test-chat-template.cpp +162 -125
  181. package/src/llama.cpp/tests/test-gguf.cpp +222 -187
  182. package/src/llama.cpp/tests/test-model-load-cancel.cpp +1 -1
  183. package/src/llama.cpp/tests/test-sampling.cpp +0 -1
  184. package/src/llama.cpp/tests/test-tokenizer-0.cpp +4 -4
  185. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +9 -7
  186. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +8 -6
@@ -0,0 +1,400 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h" // ggml_op
4
+
5
+ #include <string>
6
+
7
+ //
8
+ // gguf constants (sync with gguf.py)
9
+ //
10
+
11
+ enum llm_arch {
12
+ LLM_ARCH_LLAMA,
13
+ LLM_ARCH_DECI,
14
+ LLM_ARCH_FALCON,
15
+ LLM_ARCH_BAICHUAN,
16
+ LLM_ARCH_GROK,
17
+ LLM_ARCH_GPT2,
18
+ LLM_ARCH_GPTJ,
19
+ LLM_ARCH_GPTNEOX,
20
+ LLM_ARCH_MPT,
21
+ LLM_ARCH_STARCODER,
22
+ LLM_ARCH_REFACT,
23
+ LLM_ARCH_BERT,
24
+ LLM_ARCH_NOMIC_BERT,
25
+ LLM_ARCH_JINA_BERT_V2,
26
+ LLM_ARCH_BLOOM,
27
+ LLM_ARCH_STABLELM,
28
+ LLM_ARCH_QWEN,
29
+ LLM_ARCH_QWEN2,
30
+ LLM_ARCH_QWEN2MOE,
31
+ LLM_ARCH_QWEN2VL,
32
+ LLM_ARCH_PHI2,
33
+ LLM_ARCH_PHI3,
34
+ LLM_ARCH_PHIMOE,
35
+ LLM_ARCH_PLAMO,
36
+ LLM_ARCH_CODESHELL,
37
+ LLM_ARCH_ORION,
38
+ LLM_ARCH_INTERNLM2,
39
+ LLM_ARCH_MINICPM,
40
+ LLM_ARCH_MINICPM3,
41
+ LLM_ARCH_GEMMA,
42
+ LLM_ARCH_GEMMA2,
43
+ LLM_ARCH_STARCODER2,
44
+ LLM_ARCH_MAMBA,
45
+ LLM_ARCH_XVERSE,
46
+ LLM_ARCH_COMMAND_R,
47
+ LLM_ARCH_COHERE2,
48
+ LLM_ARCH_DBRX,
49
+ LLM_ARCH_OLMO,
50
+ LLM_ARCH_OLMO2,
51
+ LLM_ARCH_OLMOE,
52
+ LLM_ARCH_OPENELM,
53
+ LLM_ARCH_ARCTIC,
54
+ LLM_ARCH_DEEPSEEK,
55
+ LLM_ARCH_DEEPSEEK2,
56
+ LLM_ARCH_CHATGLM,
57
+ LLM_ARCH_BITNET,
58
+ LLM_ARCH_T5,
59
+ LLM_ARCH_T5ENCODER,
60
+ LLM_ARCH_JAIS,
61
+ LLM_ARCH_NEMOTRON,
62
+ LLM_ARCH_EXAONE,
63
+ LLM_ARCH_RWKV6,
64
+ LLM_ARCH_RWKV6QWEN2,
65
+ LLM_ARCH_GRANITE,
66
+ LLM_ARCH_GRANITE_MOE,
67
+ LLM_ARCH_CHAMELEON,
68
+ LLM_ARCH_WAVTOKENIZER_DEC,
69
+ LLM_ARCH_UNKNOWN,
70
+ };
71
+
72
+ enum llm_kv {
73
+ LLM_KV_GENERAL_TYPE,
74
+ LLM_KV_GENERAL_ARCHITECTURE,
75
+ LLM_KV_GENERAL_QUANTIZATION_VERSION,
76
+ LLM_KV_GENERAL_ALIGNMENT,
77
+ LLM_KV_GENERAL_NAME,
78
+ LLM_KV_GENERAL_AUTHOR,
79
+ LLM_KV_GENERAL_VERSION,
80
+ LLM_KV_GENERAL_URL,
81
+ LLM_KV_GENERAL_DESCRIPTION,
82
+ LLM_KV_GENERAL_LICENSE,
83
+ LLM_KV_GENERAL_SOURCE_URL,
84
+ LLM_KV_GENERAL_SOURCE_HF_REPO,
85
+
86
+ LLM_KV_VOCAB_SIZE,
87
+ LLM_KV_CONTEXT_LENGTH,
88
+ LLM_KV_EMBEDDING_LENGTH,
89
+ LLM_KV_FEATURES_LENGTH,
90
+ LLM_KV_BLOCK_COUNT,
91
+ LLM_KV_LEADING_DENSE_BLOCK_COUNT,
92
+ LLM_KV_FEED_FORWARD_LENGTH,
93
+ LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
94
+ LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
95
+ LLM_KV_USE_PARALLEL_RESIDUAL,
96
+ LLM_KV_TENSOR_DATA_LAYOUT,
97
+ LLM_KV_EXPERT_COUNT,
98
+ LLM_KV_EXPERT_USED_COUNT,
99
+ LLM_KV_EXPERT_SHARED_COUNT,
100
+ LLM_KV_EXPERT_WEIGHTS_SCALE,
101
+ LLM_KV_EXPERT_WEIGHTS_NORM,
102
+ LLM_KV_EXPERT_GATING_FUNC,
103
+ LLM_KV_POOLING_TYPE,
104
+ LLM_KV_LOGIT_SCALE,
105
+ LLM_KV_DECODER_START_TOKEN_ID,
106
+ LLM_KV_ATTN_LOGIT_SOFTCAPPING,
107
+ LLM_KV_FINAL_LOGIT_SOFTCAPPING,
108
+ LLM_KV_SWIN_NORM,
109
+ LLM_KV_RESCALE_EVERY_N_LAYERS,
110
+ LLM_KV_TIME_MIX_EXTRA_DIM,
111
+ LLM_KV_TIME_DECAY_EXTRA_DIM,
112
+ LLM_KV_RESIDUAL_SCALE,
113
+ LLM_KV_EMBEDDING_SCALE,
114
+ LLM_KV_TOKEN_SHIFT_COUNT,
115
+
116
+ LLM_KV_ATTENTION_HEAD_COUNT,
117
+ LLM_KV_ATTENTION_HEAD_COUNT_KV,
118
+ LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
119
+ LLM_KV_ATTENTION_CLAMP_KQV,
120
+ LLM_KV_ATTENTION_KEY_LENGTH,
121
+ LLM_KV_ATTENTION_VALUE_LENGTH,
122
+ LLM_KV_ATTENTION_LAYERNORM_EPS,
123
+ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
124
+ LLM_KV_ATTENTION_GROUPNORM_EPS,
125
+ LLM_KV_ATTENTION_GROUPNORM_GROUPS,
126
+ LLM_KV_ATTENTION_CAUSAL,
127
+ LLM_KV_ATTENTION_Q_LORA_RANK,
128
+ LLM_KV_ATTENTION_KV_LORA_RANK,
129
+ LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
130
+ LLM_KV_ATTENTION_SLIDING_WINDOW,
131
+ LLM_KV_ATTENTION_SCALE,
132
+
133
+ LLM_KV_ROPE_DIMENSION_COUNT,
134
+ LLM_KV_ROPE_DIMENSION_SECTIONS,
135
+ LLM_KV_ROPE_FREQ_BASE,
136
+ LLM_KV_ROPE_SCALE_LINEAR,
137
+ LLM_KV_ROPE_SCALING_TYPE,
138
+ LLM_KV_ROPE_SCALING_FACTOR,
139
+ LLM_KV_ROPE_SCALING_ATTN_FACTOR,
140
+ LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
141
+ LLM_KV_ROPE_SCALING_FINETUNED,
142
+ LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
143
+
144
+ LLM_KV_SPLIT_NO,
145
+ LLM_KV_SPLIT_COUNT,
146
+ LLM_KV_SPLIT_TENSORS_COUNT,
147
+
148
+ LLM_KV_SSM_INNER_SIZE,
149
+ LLM_KV_SSM_CONV_KERNEL,
150
+ LLM_KV_SSM_STATE_SIZE,
151
+ LLM_KV_SSM_TIME_STEP_RANK,
152
+ LLM_KV_SSM_DT_B_C_RMS,
153
+
154
+ LLM_KV_WKV_HEAD_SIZE,
155
+
156
+ LLM_KV_TOKENIZER_MODEL,
157
+ LLM_KV_TOKENIZER_PRE,
158
+ LLM_KV_TOKENIZER_LIST,
159
+ LLM_KV_TOKENIZER_TOKEN_TYPE,
160
+ LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
161
+ LLM_KV_TOKENIZER_SCORES,
162
+ LLM_KV_TOKENIZER_MERGES,
163
+ LLM_KV_TOKENIZER_BOS_ID,
164
+ LLM_KV_TOKENIZER_EOS_ID,
165
+ LLM_KV_TOKENIZER_EOT_ID,
166
+ LLM_KV_TOKENIZER_EOM_ID,
167
+ LLM_KV_TOKENIZER_UNK_ID,
168
+ LLM_KV_TOKENIZER_SEP_ID,
169
+ LLM_KV_TOKENIZER_PAD_ID,
170
+ LLM_KV_TOKENIZER_CLS_ID,
171
+ LLM_KV_TOKENIZER_MASK_ID,
172
+ LLM_KV_TOKENIZER_ADD_BOS,
173
+ LLM_KV_TOKENIZER_ADD_EOS,
174
+ LLM_KV_TOKENIZER_ADD_PREFIX,
175
+ LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
176
+ LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
177
+ LLM_KV_TOKENIZER_HF_JSON,
178
+ LLM_KV_TOKENIZER_RWKV,
179
+ LLM_KV_TOKENIZER_CHAT_TEMPLATE,
180
+ LLM_KV_TOKENIZER_FIM_PRE_ID,
181
+ LLM_KV_TOKENIZER_FIM_SUF_ID,
182
+ LLM_KV_TOKENIZER_FIM_MID_ID,
183
+ LLM_KV_TOKENIZER_FIM_PAD_ID,
184
+ LLM_KV_TOKENIZER_FIM_REP_ID,
185
+ LLM_KV_TOKENIZER_FIM_SEP_ID,
186
+
187
+ LLM_KV_ADAPTER_TYPE,
188
+ LLM_KV_ADAPTER_LORA_ALPHA,
189
+
190
+ LLM_KV_POSNET_EMBEDDING_LENGTH,
191
+ LLM_KV_POSNET_BLOCK_COUNT,
192
+
193
+ LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
194
+ LLM_KV_CONVNEXT_BLOCK_COUNT,
195
+
196
+ // deprecated:
197
+ LLM_KV_TOKENIZER_PREFIX_ID,
198
+ LLM_KV_TOKENIZER_SUFFIX_ID,
199
+ LLM_KV_TOKENIZER_MIDDLE_ID,
200
+ };
201
+
202
+ enum llm_tensor {
203
+ LLM_TENSOR_TOKEN_EMBD,
204
+ LLM_TENSOR_TOKEN_EMBD_NORM,
205
+ LLM_TENSOR_TOKEN_TYPES,
206
+ LLM_TENSOR_POS_EMBD,
207
+ LLM_TENSOR_OUTPUT,
208
+ LLM_TENSOR_OUTPUT_NORM,
209
+ LLM_TENSOR_ROPE_FREQS,
210
+ LLM_TENSOR_ROPE_FACTORS_LONG,
211
+ LLM_TENSOR_ROPE_FACTORS_SHORT,
212
+ LLM_TENSOR_ATTN_Q,
213
+ LLM_TENSOR_ATTN_K,
214
+ LLM_TENSOR_ATTN_V,
215
+ LLM_TENSOR_ATTN_QKV,
216
+ LLM_TENSOR_ATTN_OUT,
217
+ LLM_TENSOR_ATTN_NORM,
218
+ LLM_TENSOR_ATTN_NORM_2,
219
+ LLM_TENSOR_ATTN_OUT_NORM,
220
+ LLM_TENSOR_ATTN_POST_NORM,
221
+ LLM_TENSOR_ATTN_ROT_EMBD,
222
+ LLM_TENSOR_FFN_GATE_INP,
223
+ LLM_TENSOR_FFN_GATE_INP_SHEXP,
224
+ LLM_TENSOR_FFN_NORM,
225
+ LLM_TENSOR_FFN_POST_NORM,
226
+ LLM_TENSOR_FFN_GATE,
227
+ LLM_TENSOR_FFN_DOWN,
228
+ LLM_TENSOR_FFN_UP,
229
+ LLM_TENSOR_FFN_ACT,
230
+ LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
231
+ LLM_TENSOR_FFN_GATE_EXP,
232
+ LLM_TENSOR_FFN_UP_EXP,
233
+ LLM_TENSOR_FFN_NORM_EXPS,
234
+ LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
235
+ LLM_TENSOR_FFN_GATE_EXPS,
236
+ LLM_TENSOR_FFN_UP_EXPS,
237
+ LLM_TENSOR_FFN_DOWN_SHEXP,
238
+ LLM_TENSOR_FFN_GATE_SHEXP,
239
+ LLM_TENSOR_FFN_UP_SHEXP,
240
+ LLM_TENSOR_FFN_EXP_PROBS_B,
241
+ LLM_TENSOR_ATTN_Q_NORM,
242
+ LLM_TENSOR_ATTN_K_NORM,
243
+ LLM_TENSOR_LAYER_OUT_NORM,
244
+ LLM_TENSOR_SSM_IN,
245
+ LLM_TENSOR_SSM_CONV1D,
246
+ LLM_TENSOR_SSM_X,
247
+ LLM_TENSOR_SSM_DT,
248
+ LLM_TENSOR_SSM_A,
249
+ LLM_TENSOR_SSM_D,
250
+ LLM_TENSOR_SSM_OUT,
251
+ LLM_TENSOR_TIME_MIX_W1,
252
+ LLM_TENSOR_TIME_MIX_W2,
253
+ LLM_TENSOR_TIME_MIX_LERP_X,
254
+ LLM_TENSOR_TIME_MIX_LERP_W,
255
+ LLM_TENSOR_TIME_MIX_LERP_K,
256
+ LLM_TENSOR_TIME_MIX_LERP_V,
257
+ LLM_TENSOR_TIME_MIX_LERP_R,
258
+ LLM_TENSOR_TIME_MIX_LERP_G,
259
+ LLM_TENSOR_TIME_MIX_LERP_FUSED,
260
+ LLM_TENSOR_TIME_MIX_FIRST,
261
+ LLM_TENSOR_TIME_MIX_DECAY,
262
+ LLM_TENSOR_TIME_MIX_DECAY_W1,
263
+ LLM_TENSOR_TIME_MIX_DECAY_W2,
264
+ LLM_TENSOR_TIME_MIX_KEY,
265
+ LLM_TENSOR_TIME_MIX_VALUE,
266
+ LLM_TENSOR_TIME_MIX_RECEPTANCE,
267
+ LLM_TENSOR_TIME_MIX_GATE,
268
+ LLM_TENSOR_TIME_MIX_LN,
269
+ LLM_TENSOR_TIME_MIX_OUTPUT,
270
+ LLM_TENSOR_CHANNEL_MIX_LERP_K,
271
+ LLM_TENSOR_CHANNEL_MIX_LERP_R,
272
+ LLM_TENSOR_CHANNEL_MIX_KEY,
273
+ LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
274
+ LLM_TENSOR_CHANNEL_MIX_VALUE,
275
+ LLM_TENSOR_ATTN_Q_A,
276
+ LLM_TENSOR_ATTN_Q_B,
277
+ LLM_TENSOR_ATTN_KV_A_MQA,
278
+ LLM_TENSOR_ATTN_KV_B,
279
+ LLM_TENSOR_ATTN_Q_A_NORM,
280
+ LLM_TENSOR_ATTN_KV_A_NORM,
281
+ LLM_TENSOR_ATTN_SUB_NORM,
282
+ LLM_TENSOR_FFN_SUB_NORM,
283
+ LLM_TENSOR_DEC_ATTN_NORM,
284
+ LLM_TENSOR_DEC_ATTN_Q,
285
+ LLM_TENSOR_DEC_ATTN_K,
286
+ LLM_TENSOR_DEC_ATTN_V,
287
+ LLM_TENSOR_DEC_ATTN_OUT,
288
+ LLM_TENSOR_DEC_ATTN_REL_B,
289
+ LLM_TENSOR_DEC_CROSS_ATTN_NORM,
290
+ LLM_TENSOR_DEC_CROSS_ATTN_Q,
291
+ LLM_TENSOR_DEC_CROSS_ATTN_K,
292
+ LLM_TENSOR_DEC_CROSS_ATTN_V,
293
+ LLM_TENSOR_DEC_CROSS_ATTN_OUT,
294
+ LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
295
+ LLM_TENSOR_DEC_FFN_NORM,
296
+ LLM_TENSOR_DEC_FFN_GATE,
297
+ LLM_TENSOR_DEC_FFN_DOWN,
298
+ LLM_TENSOR_DEC_FFN_UP,
299
+ LLM_TENSOR_DEC_OUTPUT_NORM,
300
+ LLM_TENSOR_ENC_ATTN_NORM,
301
+ LLM_TENSOR_ENC_ATTN_Q,
302
+ LLM_TENSOR_ENC_ATTN_K,
303
+ LLM_TENSOR_ENC_ATTN_V,
304
+ LLM_TENSOR_ENC_ATTN_OUT,
305
+ LLM_TENSOR_ENC_ATTN_REL_B,
306
+ LLM_TENSOR_ENC_FFN_NORM,
307
+ LLM_TENSOR_ENC_FFN_GATE,
308
+ LLM_TENSOR_ENC_FFN_DOWN,
309
+ LLM_TENSOR_ENC_FFN_UP,
310
+ LLM_TENSOR_ENC_OUTPUT_NORM,
311
+ LLM_TENSOR_CLS,
312
+ LLM_TENSOR_CLS_OUT,
313
+ LLM_TENSOR_CONV1D,
314
+ LLM_TENSOR_CONVNEXT_DW,
315
+ LLM_TENSOR_CONVNEXT_NORM,
316
+ LLM_TENSOR_CONVNEXT_PW1,
317
+ LLM_TENSOR_CONVNEXT_PW2,
318
+ LLM_TENSOR_CONVNEXT_GAMMA,
319
+ LLM_TENSOR_POS_NET_CONV1,
320
+ LLM_TENSOR_POS_NET_CONV2,
321
+ LLM_TENSOR_POS_NET_NORM,
322
+ LLM_TENSOR_POS_NET_NORM1,
323
+ LLM_TENSOR_POS_NET_NORM2,
324
+ LLM_TENSOR_POS_NET_ATTN_NORM,
325
+ LLM_TENSOR_POS_NET_ATTN_Q,
326
+ LLM_TENSOR_POS_NET_ATTN_K,
327
+ LLM_TENSOR_POS_NET_ATTN_V,
328
+ LLM_TENSOR_POS_NET_ATTN_OUT,
329
+ };
330
+
331
+ enum llm_tensor_layer {
332
+ LLM_TENSOR_LAYER_INPUT,
333
+ LLM_TENSOR_LAYER_REPEATING,
334
+ LLM_TENSOR_LAYER_OUTPUT,
335
+ };
336
+
337
+ struct LLM_KV {
338
+ LLM_KV(llm_arch arch);
339
+
340
+ llm_arch arch;
341
+
342
+ std::string operator()(llm_kv kv) const;
343
+ };
344
+
345
+ // helper to handle gguf constants
346
+ // usage:
347
+ //
348
+ // const auto tn = LLM_TN(LLM_ARCH_LLAMA);
349
+ //
350
+ // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
351
+ // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
352
+ // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
353
+ //
354
+ struct LLM_TN_IMPL {
355
+ const llm_arch arch;
356
+ const llm_tensor tensor;
357
+ const char * const suffix;
358
+ const int bid;
359
+ const int xid;
360
+
361
+ std::string str() const;
362
+
363
+ operator std::string() const {
364
+ return str();
365
+ }
366
+
367
+ friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
368
+ return str == tn.str();
369
+ }
370
+
371
+ friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
372
+ return str != tn.str();
373
+ }
374
+ };
375
+
376
+ struct LLM_TN {
377
+ LLM_TN(llm_arch arch) : arch(arch) {}
378
+
379
+ llm_arch arch;
380
+
381
+ LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
382
+ return { arch, tensor, suffix, bid, xid };
383
+ }
384
+
385
+ LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
386
+ return { arch, tensor, nullptr, bid, xid };
387
+ }
388
+ };
389
+
390
+
391
+ struct llm_tensor_info {
392
+ llm_tensor_layer layer;
393
+ ggml_op op;
394
+ };
395
+
396
+ const char * llm_arch_name(llm_arch arch);
397
+
398
+ llm_arch llm_arch_from_string(const std::string & name);
399
+
400
+ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);