cui-llama.rn 1.3.5 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +22 -1
  2. package/android/src/main/CMakeLists.txt +25 -20
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +31 -9
  4. package/android/src/main/java/com/rnllama/RNLlama.java +98 -0
  5. package/android/src/main/jni-utils.h +94 -0
  6. package/android/src/main/jni.cpp +108 -37
  7. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +15 -0
  8. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +15 -0
  9. package/cpp/common.cpp +1982 -1965
  10. package/cpp/common.h +665 -657
  11. package/cpp/ggml-backend-reg.cpp +5 -0
  12. package/cpp/ggml-backend.cpp +5 -2
  13. package/cpp/ggml-cpp.h +1 -0
  14. package/cpp/ggml-cpu-aarch64.cpp +6 -1
  15. package/cpp/ggml-cpu-quants.c +5 -1
  16. package/cpp/ggml-cpu.c +14122 -14122
  17. package/cpp/ggml-cpu.cpp +627 -627
  18. package/cpp/ggml-impl.h +11 -16
  19. package/cpp/ggml-metal-impl.h +288 -0
  20. package/cpp/ggml-metal.m +2 -2
  21. package/cpp/ggml-opt.cpp +854 -0
  22. package/cpp/ggml-opt.h +216 -0
  23. package/cpp/ggml.c +0 -1276
  24. package/cpp/ggml.h +0 -140
  25. package/cpp/gguf.cpp +1325 -0
  26. package/cpp/gguf.h +202 -0
  27. package/cpp/llama-adapter.cpp +346 -0
  28. package/cpp/llama-adapter.h +73 -0
  29. package/cpp/llama-arch.cpp +1434 -0
  30. package/cpp/llama-arch.h +395 -0
  31. package/cpp/llama-batch.cpp +368 -0
  32. package/cpp/llama-batch.h +88 -0
  33. package/cpp/llama-chat.cpp +567 -0
  34. package/cpp/llama-chat.h +51 -0
  35. package/cpp/llama-context.cpp +1771 -0
  36. package/cpp/llama-context.h +128 -0
  37. package/cpp/llama-cparams.cpp +1 -0
  38. package/cpp/llama-cparams.h +37 -0
  39. package/cpp/llama-cpp.h +30 -0
  40. package/cpp/llama-grammar.cpp +1 -0
  41. package/cpp/llama-grammar.h +3 -1
  42. package/cpp/llama-hparams.cpp +71 -0
  43. package/cpp/llama-hparams.h +140 -0
  44. package/cpp/llama-impl.cpp +167 -0
  45. package/cpp/llama-impl.h +16 -136
  46. package/cpp/llama-kv-cache.cpp +718 -0
  47. package/cpp/llama-kv-cache.h +218 -0
  48. package/cpp/llama-mmap.cpp +589 -0
  49. package/cpp/llama-mmap.h +67 -0
  50. package/cpp/llama-model-loader.cpp +1011 -0
  51. package/cpp/llama-model-loader.h +158 -0
  52. package/cpp/llama-model.cpp +2202 -0
  53. package/cpp/llama-model.h +391 -0
  54. package/cpp/llama-sampling.cpp +117 -4
  55. package/cpp/llama-vocab.cpp +21 -28
  56. package/cpp/llama-vocab.h +13 -1
  57. package/cpp/llama.cpp +12547 -23528
  58. package/cpp/llama.h +31 -6
  59. package/cpp/rn-llama.hpp +90 -87
  60. package/cpp/sgemm.cpp +776 -70
  61. package/cpp/sgemm.h +14 -14
  62. package/cpp/unicode.cpp +6 -0
  63. package/ios/RNLlama.mm +47 -0
  64. package/ios/RNLlamaContext.h +3 -1
  65. package/ios/RNLlamaContext.mm +71 -14
  66. package/jest/mock.js +15 -3
  67. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  68. package/lib/commonjs/index.js +33 -37
  69. package/lib/commonjs/index.js.map +1 -1
  70. package/lib/module/NativeRNLlama.js.map +1 -1
  71. package/lib/module/index.js +31 -35
  72. package/lib/module/index.js.map +1 -1
  73. package/lib/typescript/NativeRNLlama.d.ts +26 -6
  74. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  75. package/lib/typescript/index.d.ts +21 -36
  76. package/lib/typescript/index.d.ts.map +1 -1
  77. package/llama-rn.podspec +4 -18
  78. package/package.json +2 -3
  79. package/src/NativeRNLlama.ts +32 -13
  80. package/src/index.ts +52 -47
@@ -0,0 +1,395 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h" // lm_ggml_op
4
+
5
+ #include <string>
6
+
7
+ //
8
+ // gguf constants (sync with gguf.py)
9
+ //
10
+
11
+ enum llm_arch {
12
+ LLM_ARCH_LLAMA,
13
+ LLM_ARCH_DECI,
14
+ LLM_ARCH_FALCON,
15
+ LLM_ARCH_BAICHUAN,
16
+ LLM_ARCH_GROK,
17
+ LLM_ARCH_GPT2,
18
+ LLM_ARCH_GPTJ,
19
+ LLM_ARCH_GPTNEOX,
20
+ LLM_ARCH_MPT,
21
+ LLM_ARCH_STARCODER,
22
+ LLM_ARCH_REFACT,
23
+ LLM_ARCH_BERT,
24
+ LLM_ARCH_NOMIC_BERT,
25
+ LLM_ARCH_JINA_BERT_V2,
26
+ LLM_ARCH_BLOOM,
27
+ LLM_ARCH_STABLELM,
28
+ LLM_ARCH_QWEN,
29
+ LLM_ARCH_QWEN2,
30
+ LLM_ARCH_QWEN2MOE,
31
+ LLM_ARCH_QWEN2VL,
32
+ LLM_ARCH_PHI2,
33
+ LLM_ARCH_PHI3,
34
+ LLM_ARCH_PLAMO,
35
+ LLM_ARCH_CODESHELL,
36
+ LLM_ARCH_ORION,
37
+ LLM_ARCH_INTERNLM2,
38
+ LLM_ARCH_MINICPM,
39
+ LLM_ARCH_MINICPM3,
40
+ LLM_ARCH_GEMMA,
41
+ LLM_ARCH_GEMMA2,
42
+ LLM_ARCH_STARCODER2,
43
+ LLM_ARCH_MAMBA,
44
+ LLM_ARCH_XVERSE,
45
+ LLM_ARCH_COMMAND_R,
46
+ LLM_ARCH_COHERE2,
47
+ LLM_ARCH_DBRX,
48
+ LLM_ARCH_OLMO,
49
+ LLM_ARCH_OLMO2,
50
+ LLM_ARCH_OLMOE,
51
+ LLM_ARCH_OPENELM,
52
+ LLM_ARCH_ARCTIC,
53
+ LLM_ARCH_DEEPSEEK,
54
+ LLM_ARCH_DEEPSEEK2,
55
+ LLM_ARCH_CHATGLM,
56
+ LLM_ARCH_BITNET,
57
+ LLM_ARCH_T5,
58
+ LLM_ARCH_T5ENCODER,
59
+ LLM_ARCH_JAIS,
60
+ LLM_ARCH_NEMOTRON,
61
+ LLM_ARCH_EXAONE,
62
+ LLM_ARCH_RWKV6,
63
+ LLM_ARCH_GRANITE,
64
+ LLM_ARCH_GRANITE_MOE,
65
+ LLM_ARCH_CHAMELEON,
66
+ LLM_ARCH_WAVTOKENIZER_DEC,
67
+ LLM_ARCH_UNKNOWN,
68
+ };
69
+
70
+ enum llm_kv {
71
+ LLM_KV_GENERAL_TYPE,
72
+ LLM_KV_GENERAL_ARCHITECTURE,
73
+ LLM_KV_GENERAL_QUANTIZATION_VERSION,
74
+ LLM_KV_GENERAL_ALIGNMENT,
75
+ LLM_KV_GENERAL_NAME,
76
+ LLM_KV_GENERAL_AUTHOR,
77
+ LLM_KV_GENERAL_VERSION,
78
+ LLM_KV_GENERAL_URL,
79
+ LLM_KV_GENERAL_DESCRIPTION,
80
+ LLM_KV_GENERAL_LICENSE,
81
+ LLM_KV_GENERAL_SOURCE_URL,
82
+ LLM_KV_GENERAL_SOURCE_HF_REPO,
83
+
84
+ LLM_KV_VOCAB_SIZE,
85
+ LLM_KV_CONTEXT_LENGTH,
86
+ LLM_KV_EMBEDDING_LENGTH,
87
+ LLM_KV_FEATURES_LENGTH,
88
+ LLM_KV_BLOCK_COUNT,
89
+ LLM_KV_LEADING_DENSE_BLOCK_COUNT,
90
+ LLM_KV_FEED_FORWARD_LENGTH,
91
+ LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
92
+ LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
93
+ LLM_KV_USE_PARALLEL_RESIDUAL,
94
+ LLM_KV_TENSOR_DATA_LAYOUT,
95
+ LLM_KV_EXPERT_COUNT,
96
+ LLM_KV_EXPERT_USED_COUNT,
97
+ LLM_KV_EXPERT_SHARED_COUNT,
98
+ LLM_KV_EXPERT_WEIGHTS_SCALE,
99
+ LLM_KV_EXPERT_WEIGHTS_NORM,
100
+ LLM_KV_EXPERT_GATING_FUNC,
101
+ LLM_KV_POOLING_TYPE,
102
+ LLM_KV_LOGIT_SCALE,
103
+ LLM_KV_DECODER_START_TOKEN_ID,
104
+ LLM_KV_ATTN_LOGIT_SOFTCAPPING,
105
+ LLM_KV_FINAL_LOGIT_SOFTCAPPING,
106
+ LLM_KV_SWIN_NORM,
107
+ LLM_KV_RESCALE_EVERY_N_LAYERS,
108
+ LLM_KV_TIME_MIX_EXTRA_DIM,
109
+ LLM_KV_TIME_DECAY_EXTRA_DIM,
110
+ LLM_KV_RESIDUAL_SCALE,
111
+ LLM_KV_EMBEDDING_SCALE,
112
+
113
+ LLM_KV_ATTENTION_HEAD_COUNT,
114
+ LLM_KV_ATTENTION_HEAD_COUNT_KV,
115
+ LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
116
+ LLM_KV_ATTENTION_CLAMP_KQV,
117
+ LLM_KV_ATTENTION_KEY_LENGTH,
118
+ LLM_KV_ATTENTION_VALUE_LENGTH,
119
+ LLM_KV_ATTENTION_LAYERNORM_EPS,
120
+ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
121
+ LLM_KV_ATTENTION_GROUPNORM_EPS,
122
+ LLM_KV_ATTENTION_GROUPNORM_GROUPS,
123
+ LLM_KV_ATTENTION_CAUSAL,
124
+ LLM_KV_ATTENTION_Q_LORA_RANK,
125
+ LLM_KV_ATTENTION_KV_LORA_RANK,
126
+ LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
127
+ LLM_KV_ATTENTION_SLIDING_WINDOW,
128
+ LLM_KV_ATTENTION_SCALE,
129
+
130
+ LLM_KV_ROPE_DIMENSION_COUNT,
131
+ LLM_KV_ROPE_DIMENSION_SECTIONS,
132
+ LLM_KV_ROPE_FREQ_BASE,
133
+ LLM_KV_ROPE_SCALE_LINEAR,
134
+ LLM_KV_ROPE_SCALING_TYPE,
135
+ LLM_KV_ROPE_SCALING_FACTOR,
136
+ LLM_KV_ROPE_SCALING_ATTN_FACTOR,
137
+ LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
138
+ LLM_KV_ROPE_SCALING_FINETUNED,
139
+ LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
140
+
141
+ LLM_KV_SPLIT_NO,
142
+ LLM_KV_SPLIT_COUNT,
143
+ LLM_KV_SPLIT_TENSORS_COUNT,
144
+
145
+ LLM_KV_SSM_INNER_SIZE,
146
+ LLM_KV_SSM_CONV_KERNEL,
147
+ LLM_KV_SSM_STATE_SIZE,
148
+ LLM_KV_SSM_TIME_STEP_RANK,
149
+ LLM_KV_SSM_DT_B_C_RMS,
150
+
151
+ LLM_KV_WKV_HEAD_SIZE,
152
+
153
+ LLM_KV_TOKENIZER_MODEL,
154
+ LLM_KV_TOKENIZER_PRE,
155
+ LLM_KV_TOKENIZER_LIST,
156
+ LLM_KV_TOKENIZER_TOKEN_TYPE,
157
+ LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
158
+ LLM_KV_TOKENIZER_SCORES,
159
+ LLM_KV_TOKENIZER_MERGES,
160
+ LLM_KV_TOKENIZER_BOS_ID,
161
+ LLM_KV_TOKENIZER_EOS_ID,
162
+ LLM_KV_TOKENIZER_EOT_ID,
163
+ LLM_KV_TOKENIZER_EOM_ID,
164
+ LLM_KV_TOKENIZER_UNK_ID,
165
+ LLM_KV_TOKENIZER_SEP_ID,
166
+ LLM_KV_TOKENIZER_PAD_ID,
167
+ LLM_KV_TOKENIZER_CLS_ID,
168
+ LLM_KV_TOKENIZER_MASK_ID,
169
+ LLM_KV_TOKENIZER_ADD_BOS,
170
+ LLM_KV_TOKENIZER_ADD_EOS,
171
+ LLM_KV_TOKENIZER_ADD_PREFIX,
172
+ LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
173
+ LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
174
+ LLM_KV_TOKENIZER_HF_JSON,
175
+ LLM_KV_TOKENIZER_RWKV,
176
+ LLM_KV_TOKENIZER_FIM_PRE_ID,
177
+ LLM_KV_TOKENIZER_FIM_SUF_ID,
178
+ LLM_KV_TOKENIZER_FIM_MID_ID,
179
+ LLM_KV_TOKENIZER_FIM_PAD_ID,
180
+ LLM_KV_TOKENIZER_FIM_REP_ID,
181
+ LLM_KV_TOKENIZER_FIM_SEP_ID,
182
+
183
+ LLM_KV_ADAPTER_TYPE,
184
+ LLM_KV_ADAPTER_LORA_ALPHA,
185
+
186
+ LLM_KV_POSNET_EMBEDDING_LENGTH,
187
+ LLM_KV_POSNET_BLOCK_COUNT,
188
+
189
+ LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
190
+ LLM_KV_CONVNEXT_BLOCK_COUNT,
191
+
192
+ // deprecated:
193
+ LLM_KV_TOKENIZER_PREFIX_ID,
194
+ LLM_KV_TOKENIZER_SUFFIX_ID,
195
+ LLM_KV_TOKENIZER_MIDDLE_ID,
196
+ };
197
+
198
+ enum llm_tensor {
199
+ LLM_TENSOR_TOKEN_EMBD,
200
+ LLM_TENSOR_TOKEN_EMBD_NORM,
201
+ LLM_TENSOR_TOKEN_TYPES,
202
+ LLM_TENSOR_POS_EMBD,
203
+ LLM_TENSOR_OUTPUT,
204
+ LLM_TENSOR_OUTPUT_NORM,
205
+ LLM_TENSOR_ROPE_FREQS,
206
+ LLM_TENSOR_ROPE_FACTORS_LONG,
207
+ LLM_TENSOR_ROPE_FACTORS_SHORT,
208
+ LLM_TENSOR_ATTN_Q,
209
+ LLM_TENSOR_ATTN_K,
210
+ LLM_TENSOR_ATTN_V,
211
+ LLM_TENSOR_ATTN_QKV,
212
+ LLM_TENSOR_ATTN_OUT,
213
+ LLM_TENSOR_ATTN_NORM,
214
+ LLM_TENSOR_ATTN_NORM_2,
215
+ LLM_TENSOR_ATTN_OUT_NORM,
216
+ LLM_TENSOR_ATTN_POST_NORM,
217
+ LLM_TENSOR_ATTN_ROT_EMBD,
218
+ LLM_TENSOR_FFN_GATE_INP,
219
+ LLM_TENSOR_FFN_GATE_INP_SHEXP,
220
+ LLM_TENSOR_FFN_NORM,
221
+ LLM_TENSOR_FFN_POST_NORM,
222
+ LLM_TENSOR_FFN_GATE,
223
+ LLM_TENSOR_FFN_DOWN,
224
+ LLM_TENSOR_FFN_UP,
225
+ LLM_TENSOR_FFN_ACT,
226
+ LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
227
+ LLM_TENSOR_FFN_GATE_EXP,
228
+ LLM_TENSOR_FFN_UP_EXP,
229
+ LLM_TENSOR_FFN_NORM_EXPS,
230
+ LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
231
+ LLM_TENSOR_FFN_GATE_EXPS,
232
+ LLM_TENSOR_FFN_UP_EXPS,
233
+ LLM_TENSOR_FFN_DOWN_SHEXP,
234
+ LLM_TENSOR_FFN_GATE_SHEXP,
235
+ LLM_TENSOR_FFN_UP_SHEXP,
236
+ LLM_TENSOR_FFN_EXP_PROBS_B,
237
+ LLM_TENSOR_ATTN_Q_NORM,
238
+ LLM_TENSOR_ATTN_K_NORM,
239
+ LLM_TENSOR_LAYER_OUT_NORM,
240
+ LLM_TENSOR_SSM_IN,
241
+ LLM_TENSOR_SSM_CONV1D,
242
+ LLM_TENSOR_SSM_X,
243
+ LLM_TENSOR_SSM_DT,
244
+ LLM_TENSOR_SSM_A,
245
+ LLM_TENSOR_SSM_D,
246
+ LLM_TENSOR_SSM_OUT,
247
+ LLM_TENSOR_TIME_MIX_W1,
248
+ LLM_TENSOR_TIME_MIX_W2,
249
+ LLM_TENSOR_TIME_MIX_LERP_X,
250
+ LLM_TENSOR_TIME_MIX_LERP_W,
251
+ LLM_TENSOR_TIME_MIX_LERP_K,
252
+ LLM_TENSOR_TIME_MIX_LERP_V,
253
+ LLM_TENSOR_TIME_MIX_LERP_R,
254
+ LLM_TENSOR_TIME_MIX_LERP_G,
255
+ LLM_TENSOR_TIME_MIX_FIRST,
256
+ LLM_TENSOR_TIME_MIX_DECAY,
257
+ LLM_TENSOR_TIME_MIX_DECAY_W1,
258
+ LLM_TENSOR_TIME_MIX_DECAY_W2,
259
+ LLM_TENSOR_TIME_MIX_KEY,
260
+ LLM_TENSOR_TIME_MIX_VALUE,
261
+ LLM_TENSOR_TIME_MIX_RECEPTANCE,
262
+ LLM_TENSOR_TIME_MIX_GATE,
263
+ LLM_TENSOR_TIME_MIX_LN,
264
+ LLM_TENSOR_TIME_MIX_OUTPUT,
265
+ LLM_TENSOR_CHANNEL_MIX_LERP_K,
266
+ LLM_TENSOR_CHANNEL_MIX_LERP_R,
267
+ LLM_TENSOR_CHANNEL_MIX_KEY,
268
+ LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
269
+ LLM_TENSOR_CHANNEL_MIX_VALUE,
270
+ LLM_TENSOR_ATTN_Q_A,
271
+ LLM_TENSOR_ATTN_Q_B,
272
+ LLM_TENSOR_ATTN_KV_A_MQA,
273
+ LLM_TENSOR_ATTN_KV_B,
274
+ LLM_TENSOR_ATTN_Q_A_NORM,
275
+ LLM_TENSOR_ATTN_KV_A_NORM,
276
+ LLM_TENSOR_ATTN_SUB_NORM,
277
+ LLM_TENSOR_FFN_SUB_NORM,
278
+ LLM_TENSOR_DEC_ATTN_NORM,
279
+ LLM_TENSOR_DEC_ATTN_Q,
280
+ LLM_TENSOR_DEC_ATTN_K,
281
+ LLM_TENSOR_DEC_ATTN_V,
282
+ LLM_TENSOR_DEC_ATTN_OUT,
283
+ LLM_TENSOR_DEC_ATTN_REL_B,
284
+ LLM_TENSOR_DEC_CROSS_ATTN_NORM,
285
+ LLM_TENSOR_DEC_CROSS_ATTN_Q,
286
+ LLM_TENSOR_DEC_CROSS_ATTN_K,
287
+ LLM_TENSOR_DEC_CROSS_ATTN_V,
288
+ LLM_TENSOR_DEC_CROSS_ATTN_OUT,
289
+ LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
290
+ LLM_TENSOR_DEC_FFN_NORM,
291
+ LLM_TENSOR_DEC_FFN_GATE,
292
+ LLM_TENSOR_DEC_FFN_DOWN,
293
+ LLM_TENSOR_DEC_FFN_UP,
294
+ LLM_TENSOR_DEC_OUTPUT_NORM,
295
+ LLM_TENSOR_ENC_ATTN_NORM,
296
+ LLM_TENSOR_ENC_ATTN_Q,
297
+ LLM_TENSOR_ENC_ATTN_K,
298
+ LLM_TENSOR_ENC_ATTN_V,
299
+ LLM_TENSOR_ENC_ATTN_OUT,
300
+ LLM_TENSOR_ENC_ATTN_REL_B,
301
+ LLM_TENSOR_ENC_FFN_NORM,
302
+ LLM_TENSOR_ENC_FFN_GATE,
303
+ LLM_TENSOR_ENC_FFN_DOWN,
304
+ LLM_TENSOR_ENC_FFN_UP,
305
+ LLM_TENSOR_ENC_OUTPUT_NORM,
306
+ LLM_TENSOR_CLS,
307
+ LLM_TENSOR_CLS_OUT,
308
+ LLM_TENSOR_CONV1D,
309
+ LLM_TENSOR_CONVNEXT_DW,
310
+ LLM_TENSOR_CONVNEXT_NORM,
311
+ LLM_TENSOR_CONVNEXT_PW1,
312
+ LLM_TENSOR_CONVNEXT_PW2,
313
+ LLM_TENSOR_CONVNEXT_GAMMA,
314
+ LLM_TENSOR_POS_NET_CONV1,
315
+ LLM_TENSOR_POS_NET_CONV2,
316
+ LLM_TENSOR_POS_NET_NORM,
317
+ LLM_TENSOR_POS_NET_NORM1,
318
+ LLM_TENSOR_POS_NET_NORM2,
319
+ LLM_TENSOR_POS_NET_ATTN_NORM,
320
+ LLM_TENSOR_POS_NET_ATTN_Q,
321
+ LLM_TENSOR_POS_NET_ATTN_K,
322
+ LLM_TENSOR_POS_NET_ATTN_V,
323
+ LLM_TENSOR_POS_NET_ATTN_OUT,
324
+ };
325
+
326
+ enum llm_tensor_layer {
327
+ LLM_TENSOR_LAYER_INPUT,
328
+ LLM_TENSOR_LAYER_REPEATING,
329
+ LLM_TENSOR_LAYER_OUTPUT,
330
+ };
331
+
332
+ struct LLM_KV {
333
+ LLM_KV(llm_arch arch);
334
+
335
+ llm_arch arch;
336
+
337
+ std::string operator()(llm_kv kv) const;
338
+ };
339
+
340
+ // helper to handle gguf constants
341
+ // usage:
342
+ //
343
+ // const auto tn = LLM_TN(LLM_ARCH_LLAMA);
344
+ //
345
+ // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
346
+ // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
347
+ // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
348
+ //
349
+ struct LLM_TN_IMPL {
350
+ const llm_arch arch;
351
+ const llm_tensor tensor;
352
+ const char * const suffix;
353
+ const int bid;
354
+ const int xid;
355
+
356
+ std::string str() const;
357
+
358
+ operator std::string() const {
359
+ return str();
360
+ }
361
+
362
+ friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
363
+ return str == tn.str();
364
+ }
365
+
366
+ friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
367
+ return str != tn.str();
368
+ }
369
+ };
370
+
371
+ struct LLM_TN {
372
+ LLM_TN(llm_arch arch) : arch(arch) {}
373
+
374
+ llm_arch arch;
375
+
376
+ LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
377
+ return { arch, tensor, suffix, bid, xid };
378
+ }
379
+
380
+ LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
381
+ return { arch, tensor, nullptr, bid, xid };
382
+ }
383
+ };
384
+
385
+
386
+ struct llm_tensor_info {
387
+ llm_tensor_layer layer;
388
+ lm_ggml_op op;
389
+ };
390
+
391
+ const char * llm_arch_name(llm_arch arch);
392
+
393
+ llm_arch llm_arch_from_string(const std::string & name);
394
+
395
+ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);