cui-llama.rn 1.4.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/jni.cpp +9 -9
- package/cpp/common.cpp +163 -60
- package/cpp/common.h +43 -12
- package/cpp/ggml-alloc.c +1042 -1037
- package/cpp/ggml-backend-impl.h +255 -256
- package/cpp/ggml-backend-reg.cpp +582 -582
- package/cpp/ggml-backend.cpp +2002 -2002
- package/cpp/ggml-backend.h +354 -352
- package/cpp/ggml-common.h +1853 -1853
- package/cpp/ggml-cpp.h +39 -39
- package/cpp/ggml-cpu-aarch64.cpp +4247 -4247
- package/cpp/ggml-cpu-aarch64.h +8 -8
- package/cpp/ggml-cpu-impl.h +386 -386
- package/cpp/ggml-cpu-quants.c +10920 -10839
- package/cpp/ggml-cpu-traits.cpp +36 -36
- package/cpp/ggml-cpu-traits.h +38 -38
- package/cpp/ggml-cpu.c +329 -60
- package/cpp/ggml-cpu.cpp +10 -2
- package/cpp/ggml-cpu.h +135 -135
- package/cpp/ggml-impl.h +567 -567
- package/cpp/ggml-metal-impl.h +17 -17
- package/cpp/ggml-metal.m +4884 -4884
- package/cpp/ggml-quants.c +5238 -5238
- package/cpp/ggml-threading.h +14 -14
- package/cpp/ggml.c +6514 -6448
- package/cpp/ggml.h +2194 -2163
- package/cpp/gguf.cpp +1329 -1325
- package/cpp/gguf.h +202 -202
- package/cpp/json-schema-to-grammar.cpp +1045 -1045
- package/cpp/json-schema-to-grammar.h +8 -8
- package/cpp/json.hpp +24766 -24766
- package/cpp/llama-adapter.cpp +347 -346
- package/cpp/llama-adapter.h +74 -73
- package/cpp/llama-arch.cpp +1487 -1434
- package/cpp/llama-arch.h +400 -395
- package/cpp/llama-batch.cpp +368 -368
- package/cpp/llama-batch.h +88 -88
- package/cpp/llama-chat.cpp +578 -567
- package/cpp/llama-chat.h +52 -51
- package/cpp/llama-context.cpp +1775 -1771
- package/cpp/llama-context.h +128 -128
- package/cpp/llama-cparams.cpp +1 -1
- package/cpp/llama-cparams.h +37 -37
- package/cpp/llama-cpp.h +30 -30
- package/cpp/llama-grammar.cpp +1139 -1139
- package/cpp/llama-grammar.h +143 -143
- package/cpp/llama-hparams.cpp +71 -71
- package/cpp/llama-hparams.h +139 -140
- package/cpp/llama-impl.cpp +167 -167
- package/cpp/llama-impl.h +61 -61
- package/cpp/llama-kv-cache.cpp +718 -718
- package/cpp/llama-kv-cache.h +218 -218
- package/cpp/llama-mmap.cpp +2 -1
- package/cpp/llama-mmap.h +67 -67
- package/cpp/llama-model-loader.cpp +1124 -1011
- package/cpp/llama-model-loader.h +167 -158
- package/cpp/llama-model.cpp +3997 -2202
- package/cpp/llama-model.h +370 -391
- package/cpp/llama-sampling.cpp +2408 -2406
- package/cpp/llama-sampling.h +32 -48
- package/cpp/llama-vocab.cpp +3247 -1982
- package/cpp/llama-vocab.h +125 -182
- package/cpp/llama.cpp +416 -2886
- package/cpp/llama.h +1323 -1285
- package/cpp/log.cpp +401 -401
- package/cpp/log.h +121 -121
- package/cpp/rn-llama.hpp +18 -12
- package/cpp/sampling.cpp +505 -500
- package/cpp/sgemm.cpp +2597 -2597
- package/cpp/speculative.cpp +277 -274
- package/cpp/speculative.h +28 -28
- package/cpp/unicode.cpp +2 -3
- package/package.json +1 -1
package/cpp/llama-arch.h
CHANGED
@@ -1,395 +1,400 @@
|
|
1
|
-
#pragma once
|
2
|
-
|
3
|
-
#include "ggml.h" // lm_ggml_op
|
4
|
-
|
5
|
-
#include <string>
|
6
|
-
|
7
|
-
//
|
8
|
-
// gguf constants (sync with gguf.py)
|
9
|
-
//
|
10
|
-
|
11
|
-
enum llm_arch {
|
12
|
-
LLM_ARCH_LLAMA,
|
13
|
-
LLM_ARCH_DECI,
|
14
|
-
LLM_ARCH_FALCON,
|
15
|
-
LLM_ARCH_BAICHUAN,
|
16
|
-
LLM_ARCH_GROK,
|
17
|
-
LLM_ARCH_GPT2,
|
18
|
-
LLM_ARCH_GPTJ,
|
19
|
-
LLM_ARCH_GPTNEOX,
|
20
|
-
LLM_ARCH_MPT,
|
21
|
-
LLM_ARCH_STARCODER,
|
22
|
-
LLM_ARCH_REFACT,
|
23
|
-
LLM_ARCH_BERT,
|
24
|
-
LLM_ARCH_NOMIC_BERT,
|
25
|
-
LLM_ARCH_JINA_BERT_V2,
|
26
|
-
LLM_ARCH_BLOOM,
|
27
|
-
LLM_ARCH_STABLELM,
|
28
|
-
LLM_ARCH_QWEN,
|
29
|
-
LLM_ARCH_QWEN2,
|
30
|
-
LLM_ARCH_QWEN2MOE,
|
31
|
-
LLM_ARCH_QWEN2VL,
|
32
|
-
LLM_ARCH_PHI2,
|
33
|
-
LLM_ARCH_PHI3,
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
//
|
346
|
-
//
|
347
|
-
//
|
348
|
-
//
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
}
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
}
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "ggml.h" // lm_ggml_op
|
4
|
+
|
5
|
+
#include <string>
|
6
|
+
|
7
|
+
//
|
8
|
+
// gguf constants (sync with gguf.py)
|
9
|
+
//
|
10
|
+
|
11
|
+
enum llm_arch {
|
12
|
+
LLM_ARCH_LLAMA,
|
13
|
+
LLM_ARCH_DECI,
|
14
|
+
LLM_ARCH_FALCON,
|
15
|
+
LLM_ARCH_BAICHUAN,
|
16
|
+
LLM_ARCH_GROK,
|
17
|
+
LLM_ARCH_GPT2,
|
18
|
+
LLM_ARCH_GPTJ,
|
19
|
+
LLM_ARCH_GPTNEOX,
|
20
|
+
LLM_ARCH_MPT,
|
21
|
+
LLM_ARCH_STARCODER,
|
22
|
+
LLM_ARCH_REFACT,
|
23
|
+
LLM_ARCH_BERT,
|
24
|
+
LLM_ARCH_NOMIC_BERT,
|
25
|
+
LLM_ARCH_JINA_BERT_V2,
|
26
|
+
LLM_ARCH_BLOOM,
|
27
|
+
LLM_ARCH_STABLELM,
|
28
|
+
LLM_ARCH_QWEN,
|
29
|
+
LLM_ARCH_QWEN2,
|
30
|
+
LLM_ARCH_QWEN2MOE,
|
31
|
+
LLM_ARCH_QWEN2VL,
|
32
|
+
LLM_ARCH_PHI2,
|
33
|
+
LLM_ARCH_PHI3,
|
34
|
+
LLM_ARCH_PHIMOE,
|
35
|
+
LLM_ARCH_PLAMO,
|
36
|
+
LLM_ARCH_CODESHELL,
|
37
|
+
LLM_ARCH_ORION,
|
38
|
+
LLM_ARCH_INTERNLM2,
|
39
|
+
LLM_ARCH_MINICPM,
|
40
|
+
LLM_ARCH_MINICPM3,
|
41
|
+
LLM_ARCH_GEMMA,
|
42
|
+
LLM_ARCH_GEMMA2,
|
43
|
+
LLM_ARCH_STARCODER2,
|
44
|
+
LLM_ARCH_MAMBA,
|
45
|
+
LLM_ARCH_XVERSE,
|
46
|
+
LLM_ARCH_COMMAND_R,
|
47
|
+
LLM_ARCH_COHERE2,
|
48
|
+
LLM_ARCH_DBRX,
|
49
|
+
LLM_ARCH_OLMO,
|
50
|
+
LLM_ARCH_OLMO2,
|
51
|
+
LLM_ARCH_OLMOE,
|
52
|
+
LLM_ARCH_OPENELM,
|
53
|
+
LLM_ARCH_ARCTIC,
|
54
|
+
LLM_ARCH_DEEPSEEK,
|
55
|
+
LLM_ARCH_DEEPSEEK2,
|
56
|
+
LLM_ARCH_CHATGLM,
|
57
|
+
LLM_ARCH_BITNET,
|
58
|
+
LLM_ARCH_T5,
|
59
|
+
LLM_ARCH_T5ENCODER,
|
60
|
+
LLM_ARCH_JAIS,
|
61
|
+
LLM_ARCH_NEMOTRON,
|
62
|
+
LLM_ARCH_EXAONE,
|
63
|
+
LLM_ARCH_RWKV6,
|
64
|
+
LLM_ARCH_RWKV6QWEN2,
|
65
|
+
LLM_ARCH_GRANITE,
|
66
|
+
LLM_ARCH_GRANITE_MOE,
|
67
|
+
LLM_ARCH_CHAMELEON,
|
68
|
+
LLM_ARCH_WAVTOKENIZER_DEC,
|
69
|
+
LLM_ARCH_UNKNOWN,
|
70
|
+
};
|
71
|
+
|
72
|
+
enum llm_kv {
|
73
|
+
LLM_KV_GENERAL_TYPE,
|
74
|
+
LLM_KV_GENERAL_ARCHITECTURE,
|
75
|
+
LLM_KV_GENERAL_QUANTIZATION_VERSION,
|
76
|
+
LLM_KV_GENERAL_ALIGNMENT,
|
77
|
+
LLM_KV_GENERAL_NAME,
|
78
|
+
LLM_KV_GENERAL_AUTHOR,
|
79
|
+
LLM_KV_GENERAL_VERSION,
|
80
|
+
LLM_KV_GENERAL_URL,
|
81
|
+
LLM_KV_GENERAL_DESCRIPTION,
|
82
|
+
LLM_KV_GENERAL_LICENSE,
|
83
|
+
LLM_KV_GENERAL_SOURCE_URL,
|
84
|
+
LLM_KV_GENERAL_SOURCE_HF_REPO,
|
85
|
+
|
86
|
+
LLM_KV_VOCAB_SIZE,
|
87
|
+
LLM_KV_CONTEXT_LENGTH,
|
88
|
+
LLM_KV_EMBEDDING_LENGTH,
|
89
|
+
LLM_KV_FEATURES_LENGTH,
|
90
|
+
LLM_KV_BLOCK_COUNT,
|
91
|
+
LLM_KV_LEADING_DENSE_BLOCK_COUNT,
|
92
|
+
LLM_KV_FEED_FORWARD_LENGTH,
|
93
|
+
LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
|
94
|
+
LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
|
95
|
+
LLM_KV_USE_PARALLEL_RESIDUAL,
|
96
|
+
LLM_KV_TENSOR_DATA_LAYOUT,
|
97
|
+
LLM_KV_EXPERT_COUNT,
|
98
|
+
LLM_KV_EXPERT_USED_COUNT,
|
99
|
+
LLM_KV_EXPERT_SHARED_COUNT,
|
100
|
+
LLM_KV_EXPERT_WEIGHTS_SCALE,
|
101
|
+
LLM_KV_EXPERT_WEIGHTS_NORM,
|
102
|
+
LLM_KV_EXPERT_GATING_FUNC,
|
103
|
+
LLM_KV_POOLING_TYPE,
|
104
|
+
LLM_KV_LOGIT_SCALE,
|
105
|
+
LLM_KV_DECODER_START_TOKEN_ID,
|
106
|
+
LLM_KV_ATTN_LOGIT_SOFTCAPPING,
|
107
|
+
LLM_KV_FINAL_LOGIT_SOFTCAPPING,
|
108
|
+
LLM_KV_SWIN_NORM,
|
109
|
+
LLM_KV_RESCALE_EVERY_N_LAYERS,
|
110
|
+
LLM_KV_TIME_MIX_EXTRA_DIM,
|
111
|
+
LLM_KV_TIME_DECAY_EXTRA_DIM,
|
112
|
+
LLM_KV_RESIDUAL_SCALE,
|
113
|
+
LLM_KV_EMBEDDING_SCALE,
|
114
|
+
LLM_KV_TOKEN_SHIFT_COUNT,
|
115
|
+
|
116
|
+
LLM_KV_ATTENTION_HEAD_COUNT,
|
117
|
+
LLM_KV_ATTENTION_HEAD_COUNT_KV,
|
118
|
+
LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
|
119
|
+
LLM_KV_ATTENTION_CLAMP_KQV,
|
120
|
+
LLM_KV_ATTENTION_KEY_LENGTH,
|
121
|
+
LLM_KV_ATTENTION_VALUE_LENGTH,
|
122
|
+
LLM_KV_ATTENTION_LAYERNORM_EPS,
|
123
|
+
LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
|
124
|
+
LLM_KV_ATTENTION_GROUPNORM_EPS,
|
125
|
+
LLM_KV_ATTENTION_GROUPNORM_GROUPS,
|
126
|
+
LLM_KV_ATTENTION_CAUSAL,
|
127
|
+
LLM_KV_ATTENTION_Q_LORA_RANK,
|
128
|
+
LLM_KV_ATTENTION_KV_LORA_RANK,
|
129
|
+
LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
|
130
|
+
LLM_KV_ATTENTION_SLIDING_WINDOW,
|
131
|
+
LLM_KV_ATTENTION_SCALE,
|
132
|
+
|
133
|
+
LLM_KV_ROPE_DIMENSION_COUNT,
|
134
|
+
LLM_KV_ROPE_DIMENSION_SECTIONS,
|
135
|
+
LLM_KV_ROPE_FREQ_BASE,
|
136
|
+
LLM_KV_ROPE_SCALE_LINEAR,
|
137
|
+
LLM_KV_ROPE_SCALING_TYPE,
|
138
|
+
LLM_KV_ROPE_SCALING_FACTOR,
|
139
|
+
LLM_KV_ROPE_SCALING_ATTN_FACTOR,
|
140
|
+
LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
|
141
|
+
LLM_KV_ROPE_SCALING_FINETUNED,
|
142
|
+
LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
|
143
|
+
|
144
|
+
LLM_KV_SPLIT_NO,
|
145
|
+
LLM_KV_SPLIT_COUNT,
|
146
|
+
LLM_KV_SPLIT_TENSORS_COUNT,
|
147
|
+
|
148
|
+
LLM_KV_SSM_INNER_SIZE,
|
149
|
+
LLM_KV_SSM_CONV_KERNEL,
|
150
|
+
LLM_KV_SSM_STATE_SIZE,
|
151
|
+
LLM_KV_SSM_TIME_STEP_RANK,
|
152
|
+
LLM_KV_SSM_DT_B_C_RMS,
|
153
|
+
|
154
|
+
LLM_KV_WKV_HEAD_SIZE,
|
155
|
+
|
156
|
+
LLM_KV_TOKENIZER_MODEL,
|
157
|
+
LLM_KV_TOKENIZER_PRE,
|
158
|
+
LLM_KV_TOKENIZER_LIST,
|
159
|
+
LLM_KV_TOKENIZER_TOKEN_TYPE,
|
160
|
+
LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
|
161
|
+
LLM_KV_TOKENIZER_SCORES,
|
162
|
+
LLM_KV_TOKENIZER_MERGES,
|
163
|
+
LLM_KV_TOKENIZER_BOS_ID,
|
164
|
+
LLM_KV_TOKENIZER_EOS_ID,
|
165
|
+
LLM_KV_TOKENIZER_EOT_ID,
|
166
|
+
LLM_KV_TOKENIZER_EOM_ID,
|
167
|
+
LLM_KV_TOKENIZER_UNK_ID,
|
168
|
+
LLM_KV_TOKENIZER_SEP_ID,
|
169
|
+
LLM_KV_TOKENIZER_PAD_ID,
|
170
|
+
LLM_KV_TOKENIZER_CLS_ID,
|
171
|
+
LLM_KV_TOKENIZER_MASK_ID,
|
172
|
+
LLM_KV_TOKENIZER_ADD_BOS,
|
173
|
+
LLM_KV_TOKENIZER_ADD_EOS,
|
174
|
+
LLM_KV_TOKENIZER_ADD_PREFIX,
|
175
|
+
LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
|
176
|
+
LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
|
177
|
+
LLM_KV_TOKENIZER_HF_JSON,
|
178
|
+
LLM_KV_TOKENIZER_RWKV,
|
179
|
+
LLM_KV_TOKENIZER_CHAT_TEMPLATE,
|
180
|
+
LLM_KV_TOKENIZER_FIM_PRE_ID,
|
181
|
+
LLM_KV_TOKENIZER_FIM_SUF_ID,
|
182
|
+
LLM_KV_TOKENIZER_FIM_MID_ID,
|
183
|
+
LLM_KV_TOKENIZER_FIM_PAD_ID,
|
184
|
+
LLM_KV_TOKENIZER_FIM_REP_ID,
|
185
|
+
LLM_KV_TOKENIZER_FIM_SEP_ID,
|
186
|
+
|
187
|
+
LLM_KV_ADAPTER_TYPE,
|
188
|
+
LLM_KV_ADAPTER_LORA_ALPHA,
|
189
|
+
|
190
|
+
LLM_KV_POSNET_EMBEDDING_LENGTH,
|
191
|
+
LLM_KV_POSNET_BLOCK_COUNT,
|
192
|
+
|
193
|
+
LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
|
194
|
+
LLM_KV_CONVNEXT_BLOCK_COUNT,
|
195
|
+
|
196
|
+
// deprecated:
|
197
|
+
LLM_KV_TOKENIZER_PREFIX_ID,
|
198
|
+
LLM_KV_TOKENIZER_SUFFIX_ID,
|
199
|
+
LLM_KV_TOKENIZER_MIDDLE_ID,
|
200
|
+
};
|
201
|
+
|
202
|
+
enum llm_tensor {
|
203
|
+
LLM_TENSOR_TOKEN_EMBD,
|
204
|
+
LLM_TENSOR_TOKEN_EMBD_NORM,
|
205
|
+
LLM_TENSOR_TOKEN_TYPES,
|
206
|
+
LLM_TENSOR_POS_EMBD,
|
207
|
+
LLM_TENSOR_OUTPUT,
|
208
|
+
LLM_TENSOR_OUTPUT_NORM,
|
209
|
+
LLM_TENSOR_ROPE_FREQS,
|
210
|
+
LLM_TENSOR_ROPE_FACTORS_LONG,
|
211
|
+
LLM_TENSOR_ROPE_FACTORS_SHORT,
|
212
|
+
LLM_TENSOR_ATTN_Q,
|
213
|
+
LLM_TENSOR_ATTN_K,
|
214
|
+
LLM_TENSOR_ATTN_V,
|
215
|
+
LLM_TENSOR_ATTN_QKV,
|
216
|
+
LLM_TENSOR_ATTN_OUT,
|
217
|
+
LLM_TENSOR_ATTN_NORM,
|
218
|
+
LLM_TENSOR_ATTN_NORM_2,
|
219
|
+
LLM_TENSOR_ATTN_OUT_NORM,
|
220
|
+
LLM_TENSOR_ATTN_POST_NORM,
|
221
|
+
LLM_TENSOR_ATTN_ROT_EMBD,
|
222
|
+
LLM_TENSOR_FFN_GATE_INP,
|
223
|
+
LLM_TENSOR_FFN_GATE_INP_SHEXP,
|
224
|
+
LLM_TENSOR_FFN_NORM,
|
225
|
+
LLM_TENSOR_FFN_POST_NORM,
|
226
|
+
LLM_TENSOR_FFN_GATE,
|
227
|
+
LLM_TENSOR_FFN_DOWN,
|
228
|
+
LLM_TENSOR_FFN_UP,
|
229
|
+
LLM_TENSOR_FFN_ACT,
|
230
|
+
LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
|
231
|
+
LLM_TENSOR_FFN_GATE_EXP,
|
232
|
+
LLM_TENSOR_FFN_UP_EXP,
|
233
|
+
LLM_TENSOR_FFN_NORM_EXPS,
|
234
|
+
LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
|
235
|
+
LLM_TENSOR_FFN_GATE_EXPS,
|
236
|
+
LLM_TENSOR_FFN_UP_EXPS,
|
237
|
+
LLM_TENSOR_FFN_DOWN_SHEXP,
|
238
|
+
LLM_TENSOR_FFN_GATE_SHEXP,
|
239
|
+
LLM_TENSOR_FFN_UP_SHEXP,
|
240
|
+
LLM_TENSOR_FFN_EXP_PROBS_B,
|
241
|
+
LLM_TENSOR_ATTN_Q_NORM,
|
242
|
+
LLM_TENSOR_ATTN_K_NORM,
|
243
|
+
LLM_TENSOR_LAYER_OUT_NORM,
|
244
|
+
LLM_TENSOR_SSM_IN,
|
245
|
+
LLM_TENSOR_SSM_CONV1D,
|
246
|
+
LLM_TENSOR_SSM_X,
|
247
|
+
LLM_TENSOR_SSM_DT,
|
248
|
+
LLM_TENSOR_SSM_A,
|
249
|
+
LLM_TENSOR_SSM_D,
|
250
|
+
LLM_TENSOR_SSM_OUT,
|
251
|
+
LLM_TENSOR_TIME_MIX_W1,
|
252
|
+
LLM_TENSOR_TIME_MIX_W2,
|
253
|
+
LLM_TENSOR_TIME_MIX_LERP_X,
|
254
|
+
LLM_TENSOR_TIME_MIX_LERP_W,
|
255
|
+
LLM_TENSOR_TIME_MIX_LERP_K,
|
256
|
+
LLM_TENSOR_TIME_MIX_LERP_V,
|
257
|
+
LLM_TENSOR_TIME_MIX_LERP_R,
|
258
|
+
LLM_TENSOR_TIME_MIX_LERP_G,
|
259
|
+
LLM_TENSOR_TIME_MIX_LERP_FUSED,
|
260
|
+
LLM_TENSOR_TIME_MIX_FIRST,
|
261
|
+
LLM_TENSOR_TIME_MIX_DECAY,
|
262
|
+
LLM_TENSOR_TIME_MIX_DECAY_W1,
|
263
|
+
LLM_TENSOR_TIME_MIX_DECAY_W2,
|
264
|
+
LLM_TENSOR_TIME_MIX_KEY,
|
265
|
+
LLM_TENSOR_TIME_MIX_VALUE,
|
266
|
+
LLM_TENSOR_TIME_MIX_RECEPTANCE,
|
267
|
+
LLM_TENSOR_TIME_MIX_GATE,
|
268
|
+
LLM_TENSOR_TIME_MIX_LN,
|
269
|
+
LLM_TENSOR_TIME_MIX_OUTPUT,
|
270
|
+
LLM_TENSOR_CHANNEL_MIX_LERP_K,
|
271
|
+
LLM_TENSOR_CHANNEL_MIX_LERP_R,
|
272
|
+
LLM_TENSOR_CHANNEL_MIX_KEY,
|
273
|
+
LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
|
274
|
+
LLM_TENSOR_CHANNEL_MIX_VALUE,
|
275
|
+
LLM_TENSOR_ATTN_Q_A,
|
276
|
+
LLM_TENSOR_ATTN_Q_B,
|
277
|
+
LLM_TENSOR_ATTN_KV_A_MQA,
|
278
|
+
LLM_TENSOR_ATTN_KV_B,
|
279
|
+
LLM_TENSOR_ATTN_Q_A_NORM,
|
280
|
+
LLM_TENSOR_ATTN_KV_A_NORM,
|
281
|
+
LLM_TENSOR_ATTN_SUB_NORM,
|
282
|
+
LLM_TENSOR_FFN_SUB_NORM,
|
283
|
+
LLM_TENSOR_DEC_ATTN_NORM,
|
284
|
+
LLM_TENSOR_DEC_ATTN_Q,
|
285
|
+
LLM_TENSOR_DEC_ATTN_K,
|
286
|
+
LLM_TENSOR_DEC_ATTN_V,
|
287
|
+
LLM_TENSOR_DEC_ATTN_OUT,
|
288
|
+
LLM_TENSOR_DEC_ATTN_REL_B,
|
289
|
+
LLM_TENSOR_DEC_CROSS_ATTN_NORM,
|
290
|
+
LLM_TENSOR_DEC_CROSS_ATTN_Q,
|
291
|
+
LLM_TENSOR_DEC_CROSS_ATTN_K,
|
292
|
+
LLM_TENSOR_DEC_CROSS_ATTN_V,
|
293
|
+
LLM_TENSOR_DEC_CROSS_ATTN_OUT,
|
294
|
+
LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
|
295
|
+
LLM_TENSOR_DEC_FFN_NORM,
|
296
|
+
LLM_TENSOR_DEC_FFN_GATE,
|
297
|
+
LLM_TENSOR_DEC_FFN_DOWN,
|
298
|
+
LLM_TENSOR_DEC_FFN_UP,
|
299
|
+
LLM_TENSOR_DEC_OUTPUT_NORM,
|
300
|
+
LLM_TENSOR_ENC_ATTN_NORM,
|
301
|
+
LLM_TENSOR_ENC_ATTN_Q,
|
302
|
+
LLM_TENSOR_ENC_ATTN_K,
|
303
|
+
LLM_TENSOR_ENC_ATTN_V,
|
304
|
+
LLM_TENSOR_ENC_ATTN_OUT,
|
305
|
+
LLM_TENSOR_ENC_ATTN_REL_B,
|
306
|
+
LLM_TENSOR_ENC_FFN_NORM,
|
307
|
+
LLM_TENSOR_ENC_FFN_GATE,
|
308
|
+
LLM_TENSOR_ENC_FFN_DOWN,
|
309
|
+
LLM_TENSOR_ENC_FFN_UP,
|
310
|
+
LLM_TENSOR_ENC_OUTPUT_NORM,
|
311
|
+
LLM_TENSOR_CLS,
|
312
|
+
LLM_TENSOR_CLS_OUT,
|
313
|
+
LLM_TENSOR_CONV1D,
|
314
|
+
LLM_TENSOR_CONVNEXT_DW,
|
315
|
+
LLM_TENSOR_CONVNEXT_NORM,
|
316
|
+
LLM_TENSOR_CONVNEXT_PW1,
|
317
|
+
LLM_TENSOR_CONVNEXT_PW2,
|
318
|
+
LLM_TENSOR_CONVNEXT_GAMMA,
|
319
|
+
LLM_TENSOR_POS_NET_CONV1,
|
320
|
+
LLM_TENSOR_POS_NET_CONV2,
|
321
|
+
LLM_TENSOR_POS_NET_NORM,
|
322
|
+
LLM_TENSOR_POS_NET_NORM1,
|
323
|
+
LLM_TENSOR_POS_NET_NORM2,
|
324
|
+
LLM_TENSOR_POS_NET_ATTN_NORM,
|
325
|
+
LLM_TENSOR_POS_NET_ATTN_Q,
|
326
|
+
LLM_TENSOR_POS_NET_ATTN_K,
|
327
|
+
LLM_TENSOR_POS_NET_ATTN_V,
|
328
|
+
LLM_TENSOR_POS_NET_ATTN_OUT,
|
329
|
+
};
|
330
|
+
|
331
|
+
enum llm_tensor_layer {
|
332
|
+
LLM_TENSOR_LAYER_INPUT,
|
333
|
+
LLM_TENSOR_LAYER_REPEATING,
|
334
|
+
LLM_TENSOR_LAYER_OUTPUT,
|
335
|
+
};
|
336
|
+
|
337
|
+
struct LLM_KV {
|
338
|
+
LLM_KV(llm_arch arch);
|
339
|
+
|
340
|
+
llm_arch arch;
|
341
|
+
|
342
|
+
std::string operator()(llm_kv kv) const;
|
343
|
+
};
|
344
|
+
|
345
|
+
// helper to handle gguf constants
|
346
|
+
// usage:
|
347
|
+
//
|
348
|
+
// const auto tn = LLM_TN(LLM_ARCH_LLAMA);
|
349
|
+
//
|
350
|
+
// std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
|
351
|
+
// std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
|
352
|
+
// std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
|
353
|
+
//
|
354
|
+
struct LLM_TN_IMPL {
|
355
|
+
const llm_arch arch;
|
356
|
+
const llm_tensor tensor;
|
357
|
+
const char * const suffix;
|
358
|
+
const int bid;
|
359
|
+
const int xid;
|
360
|
+
|
361
|
+
std::string str() const;
|
362
|
+
|
363
|
+
operator std::string() const {
|
364
|
+
return str();
|
365
|
+
}
|
366
|
+
|
367
|
+
friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
|
368
|
+
return str == tn.str();
|
369
|
+
}
|
370
|
+
|
371
|
+
friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
|
372
|
+
return str != tn.str();
|
373
|
+
}
|
374
|
+
};
|
375
|
+
|
376
|
+
struct LLM_TN {
|
377
|
+
LLM_TN(llm_arch arch) : arch(arch) {}
|
378
|
+
|
379
|
+
llm_arch arch;
|
380
|
+
|
381
|
+
LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
|
382
|
+
return { arch, tensor, suffix, bid, xid };
|
383
|
+
}
|
384
|
+
|
385
|
+
LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
|
386
|
+
return { arch, tensor, nullptr, bid, xid };
|
387
|
+
}
|
388
|
+
};
|
389
|
+
|
390
|
+
|
391
|
+
struct llm_tensor_info {
|
392
|
+
llm_tensor_layer layer;
|
393
|
+
lm_ggml_op op;
|
394
|
+
};
|
395
|
+
|
396
|
+
const char * llm_arch_name(llm_arch arch);
|
397
|
+
|
398
|
+
llm_arch llm_arch_from_string(const std::string & name);
|
399
|
+
|
400
|
+
const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
|