bigdl-core-cpp 2.5.0rc1__py3-none-win_amd64.whl → 2.6.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/{convert-hf-to-gguf.py → convert_hf_to_gguf.py} +1673 -278
- bigdl/cpp/convert_hf_to_gguf_update.py +381 -0
- bigdl/cpp/convert_llama_ggml_to_gguf.py +450 -0
- bigdl/cpp/convert_lora_to_gguf.py +461 -0
- bigdl/cpp/gguf-py/gguf/__init__.py +1 -1
- bigdl/cpp/gguf-py/gguf/constants.py +698 -171
- bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +108 -17
- bigdl/cpp/gguf-py/gguf/lazy.py +3 -1
- bigdl/cpp/gguf-py/gguf/metadata.py +195 -76
- bigdl/cpp/gguf-py/gguf/quants.py +1210 -64
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +262 -43
- bigdl/cpp/gguf-py/gguf/utility.py +2 -2
- bigdl/cpp/gguf-py/gguf/vocab.py +325 -3
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/ggml-base.dll +0 -0
- bigdl/cpp/libs/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/libc++.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama-lib.exe +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/ollama_ggml.dll +0 -0
- bigdl/cpp/libs/ollama_llama.dll +0 -0
- bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
- {bigdl_core_cpp-2.5.0rc1.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.bat +7 -2
- bigdl_core_cpp-2.6.0.data/scripts/init-ollama.bat +16 -0
- {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0.dist-info}/METADATA +9 -5
- bigdl_core_cpp-2.6.0.dist-info/RECORD +57 -0
- {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0.dist-info}/WHEEL +1 -1
- bigdl/cpp/convert.py +0 -1714
- bigdl/cpp/libs/baby-llama.exe +0 -0
- bigdl/cpp/libs/batched-bench.exe +0 -0
- bigdl/cpp/libs/batched.exe +0 -0
- bigdl/cpp/libs/beam-search.exe +0 -0
- bigdl/cpp/libs/benchmark.exe +0 -0
- bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/embedding.exe +0 -0
- bigdl/cpp/libs/export-lora.exe +0 -0
- bigdl/cpp/libs/finetune.exe +0 -0
- bigdl/cpp/libs/ggml_shared.dll +0 -0
- bigdl/cpp/libs/gguf.exe +0 -0
- bigdl/cpp/libs/gritlm.exe +0 -0
- bigdl/cpp/libs/imatrix.exe +0 -0
- bigdl/cpp/libs/infill.exe +0 -0
- bigdl/cpp/libs/llava-cli.exe +0 -0
- bigdl/cpp/libs/lookahead.exe +0 -0
- bigdl/cpp/libs/lookup.exe +0 -0
- bigdl/cpp/libs/ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/main.exe +0 -0
- bigdl/cpp/libs/parallel.exe +0 -0
- bigdl/cpp/libs/passkey.exe +0 -0
- bigdl/cpp/libs/perplexity.exe +0 -0
- bigdl/cpp/libs/q8dot.exe +0 -0
- bigdl/cpp/libs/quantize-stats.exe +0 -0
- bigdl/cpp/libs/quantize.exe +0 -0
- bigdl/cpp/libs/save-load-state.exe +0 -0
- bigdl/cpp/libs/server.exe +0 -0
- bigdl/cpp/libs/simple.exe +0 -0
- bigdl/cpp/libs/speculative.exe +0 -0
- bigdl/cpp/libs/tokenize.exe +0 -0
- bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
- bigdl/cpp/libs/vdot.exe +0 -0
- bigdl_core_cpp-2.5.0rc1.data/scripts/init-ollama.bat +0 -13
- bigdl_core_cpp-2.5.0rc1.dist-info/RECORD +0 -63
- {bigdl_core_cpp-2.5.0rc1.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.5.0rc1.dist-info → bigdl_core_cpp-2.6.0.dist-info}/top_level.txt +0 -0
@@ -64,20 +64,33 @@ class Keys:
|
|
64
64
|
BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
|
65
65
|
BASE_MODEL_VERSION = "general.base_model.{id}.version"
|
66
66
|
BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
|
67
|
+
BASE_MODEL_DESCRIPTION = "general.base_model.{id}.description"
|
67
68
|
BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
|
68
69
|
BASE_MODEL_DOI = "general.base_model.{id}.doi"
|
69
70
|
BASE_MODEL_UUID = "general.base_model.{id}.uuid"
|
70
71
|
BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
|
71
72
|
|
73
|
+
# Dataset Source
|
74
|
+
DATASET_COUNT = "general.dataset.count"
|
75
|
+
DATASET_NAME = "general.dataset.{id}.name"
|
76
|
+
DATASET_AUTHOR = "general.dataset.{id}.author"
|
77
|
+
DATASET_VERSION = "general.dataset.{id}.version"
|
78
|
+
DATASET_ORGANIZATION = "general.dataset.{id}.organization"
|
79
|
+
DATASET_DESCRIPTION = "general.dataset.{id}.description"
|
80
|
+
DATASET_URL = "general.dataset.{id}.url" # Model Website/Paper
|
81
|
+
DATASET_DOI = "general.dataset.{id}.doi"
|
82
|
+
DATASET_UUID = "general.dataset.{id}.uuid"
|
83
|
+
DATASET_REPO_URL = "general.dataset.{id}.repo_url" # Model Source Repository (git/svn/etc...)
|
84
|
+
|
72
85
|
# Array based KV stores
|
73
86
|
TAGS = "general.tags"
|
74
87
|
LANGUAGES = "general.languages"
|
75
|
-
DATASETS = "general.datasets"
|
76
88
|
|
77
89
|
class LLM:
|
78
90
|
VOCAB_SIZE = "{arch}.vocab_size"
|
79
91
|
CONTEXT_LENGTH = "{arch}.context_length"
|
80
92
|
EMBEDDING_LENGTH = "{arch}.embedding_length"
|
93
|
+
FEATURES_LENGTH = "{arch}.features_length"
|
81
94
|
BLOCK_COUNT = "{arch}.block_count"
|
82
95
|
LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
|
83
96
|
FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
|
@@ -89,11 +102,20 @@ class Keys:
|
|
89
102
|
EXPERT_USED_COUNT = "{arch}.expert_used_count"
|
90
103
|
EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
|
91
104
|
EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
|
105
|
+
EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
|
106
|
+
EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
|
92
107
|
POOLING_TYPE = "{arch}.pooling_type"
|
93
108
|
LOGIT_SCALE = "{arch}.logit_scale"
|
94
109
|
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
|
95
110
|
ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
|
96
111
|
FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
|
112
|
+
SWIN_NORM = "{arch}.swin_norm"
|
113
|
+
RESCALE_EVERY_N_LAYERS = "{arch}.rescale_every_n_layers"
|
114
|
+
TIME_MIX_EXTRA_DIM = "{arch}.time_mix_extra_dim"
|
115
|
+
TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
|
116
|
+
RESIDUAL_SCALE = "{arch}.residual_scale"
|
117
|
+
EMBEDDING_SCALE = "{arch}.embedding_scale"
|
118
|
+
TOKEN_SHIFT_COUNT = "{arch}.token_shift_count"
|
97
119
|
|
98
120
|
class Attention:
|
99
121
|
HEAD_COUNT = "{arch}.attention.head_count"
|
@@ -104,14 +126,18 @@ class Keys:
|
|
104
126
|
VALUE_LENGTH = "{arch}.attention.value_length"
|
105
127
|
LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
|
106
128
|
LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
|
129
|
+
GROUPNORM_EPS = "{arch}.attention.group_norm_epsilon"
|
130
|
+
GROUPNORM_GROUPS = "{arch}.attention.group_norm_groups"
|
107
131
|
CAUSAL = "{arch}.attention.causal"
|
108
132
|
Q_LORA_RANK = "{arch}.attention.q_lora_rank"
|
109
133
|
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
|
110
134
|
REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
|
111
135
|
SLIDING_WINDOW = "{arch}.attention.sliding_window"
|
136
|
+
SCALE = "{arch}.attention.scale"
|
112
137
|
|
113
138
|
class Rope:
|
114
139
|
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
140
|
+
DIMENSION_SECTIONS = "{arch}.rope.dimension_sections"
|
115
141
|
FREQ_BASE = "{arch}.rope.freq_base"
|
116
142
|
SCALING_TYPE = "{arch}.rope.scaling.type"
|
117
143
|
SCALING_FACTOR = "{arch}.rope.scaling.factor"
|
@@ -130,6 +156,18 @@ class Keys:
|
|
130
156
|
INNER_SIZE = "{arch}.ssm.inner_size"
|
131
157
|
STATE_SIZE = "{arch}.ssm.state_size"
|
132
158
|
TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
|
159
|
+
DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
|
160
|
+
|
161
|
+
class WKV:
|
162
|
+
HEAD_SIZE = "{arch}.wkv.head_size"
|
163
|
+
|
164
|
+
class PosNet:
|
165
|
+
EMBEDDING_LENGTH = "{arch}.posnet.embedding_length"
|
166
|
+
BLOCK_COUNT = "{arch}.posnet.block_count"
|
167
|
+
|
168
|
+
class ConvNext:
|
169
|
+
EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
|
170
|
+
BLOCK_COUNT = "{arch}.convnext.block_count"
|
133
171
|
|
134
172
|
class Tokenizer:
|
135
173
|
MODEL = "tokenizer.ggml.model"
|
@@ -141,10 +179,11 @@ class Keys:
|
|
141
179
|
MERGES = "tokenizer.ggml.merges"
|
142
180
|
BOS_ID = "tokenizer.ggml.bos_token_id"
|
143
181
|
EOS_ID = "tokenizer.ggml.eos_token_id"
|
182
|
+
EOT_ID = "tokenizer.ggml.eot_token_id"
|
183
|
+
EOM_ID = "tokenizer.ggml.eom_token_id"
|
144
184
|
UNK_ID = "tokenizer.ggml.unknown_token_id"
|
145
185
|
SEP_ID = "tokenizer.ggml.seperator_token_id"
|
146
186
|
PAD_ID = "tokenizer.ggml.padding_token_id"
|
147
|
-
CLS_ID = "tokenizer.ggml.cls_token_id"
|
148
187
|
MASK_ID = "tokenizer.ggml.mask_token_id"
|
149
188
|
ADD_BOS = "tokenizer.ggml.add_bos_token"
|
150
189
|
ADD_EOS = "tokenizer.ggml.add_eos_token"
|
@@ -157,10 +196,16 @@ class Keys:
|
|
157
196
|
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
|
158
197
|
CHAT_TEMPLATES = "tokenizer.chat_templates"
|
159
198
|
# FIM/Infill special tokens constants
|
199
|
+
FIM_PRE_ID = "tokenizer.ggml.fim_pre_token_id"
|
200
|
+
FIM_SUF_ID = "tokenizer.ggml.fim_suf_token_id"
|
201
|
+
FIM_MID_ID = "tokenizer.ggml.fim_mid_token_id"
|
202
|
+
FIM_PAD_ID = "tokenizer.ggml.fim_pad_token_id"
|
203
|
+
FIM_REP_ID = "tokenizer.ggml.fim_rep_token_id"
|
204
|
+
FIM_SEP_ID = "tokenizer.ggml.fim_sep_token_id"
|
205
|
+
# deprecated:
|
160
206
|
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
|
161
207
|
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
|
162
208
|
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
|
163
|
-
EOT_ID = "tokenizer.ggml.eot_token_id"
|
164
209
|
|
165
210
|
class Adapter:
|
166
211
|
TYPE = "adapter.type"
|
@@ -177,46 +222,63 @@ class GGUFType:
|
|
177
222
|
|
178
223
|
|
179
224
|
class MODEL_ARCH(IntEnum):
|
180
|
-
LLAMA
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
225
|
+
LLAMA = auto()
|
226
|
+
DECI = auto()
|
227
|
+
FALCON = auto()
|
228
|
+
BAICHUAN = auto()
|
229
|
+
GROK = auto()
|
230
|
+
GPT2 = auto()
|
231
|
+
GPTJ = auto()
|
232
|
+
GPTNEOX = auto()
|
233
|
+
MPT = auto()
|
234
|
+
STARCODER = auto()
|
235
|
+
REFACT = auto()
|
236
|
+
BERT = auto()
|
237
|
+
NOMIC_BERT = auto()
|
238
|
+
JINA_BERT_V2 = auto()
|
239
|
+
BLOOM = auto()
|
240
|
+
STABLELM = auto()
|
241
|
+
QWEN = auto()
|
242
|
+
QWEN2 = auto()
|
243
|
+
QWEN2MOE = auto()
|
244
|
+
QWEN2VL = auto()
|
245
|
+
PHI2 = auto()
|
246
|
+
PHI3 = auto()
|
247
|
+
PHIMOE = auto()
|
248
|
+
PLAMO = auto()
|
249
|
+
CODESHELL = auto()
|
250
|
+
ORION = auto()
|
251
|
+
INTERNLM2 = auto()
|
252
|
+
MINICPM = auto()
|
253
|
+
MINICPM3 = auto()
|
254
|
+
GEMMA = auto()
|
255
|
+
GEMMA2 = auto()
|
256
|
+
STARCODER2 = auto()
|
257
|
+
RWKV6 = auto()
|
258
|
+
RWKV6QWEN2 = auto()
|
259
|
+
MAMBA = auto()
|
260
|
+
XVERSE = auto()
|
261
|
+
COMMAND_R = auto()
|
262
|
+
COHERE2 = auto()
|
263
|
+
DBRX = auto()
|
264
|
+
OLMO = auto()
|
265
|
+
OLMO2 = auto()
|
266
|
+
OLMOE = auto()
|
267
|
+
OPENELM = auto()
|
268
|
+
ARCTIC = auto()
|
269
|
+
DEEPSEEK = auto()
|
270
|
+
DEEPSEEK2 = auto()
|
271
|
+
CHATGLM = auto()
|
272
|
+
BITNET = auto()
|
273
|
+
T5 = auto()
|
274
|
+
T5ENCODER = auto()
|
275
|
+
JAIS = auto()
|
276
|
+
NEMOTRON = auto()
|
277
|
+
EXAONE = auto()
|
278
|
+
GRANITE = auto()
|
279
|
+
GRANITE_MOE = auto()
|
280
|
+
CHAMELEON = auto()
|
281
|
+
WAVTOKENIZER_DEC = auto()
|
220
282
|
|
221
283
|
|
222
284
|
class MODEL_TENSOR(IntEnum):
|
@@ -255,6 +317,7 @@ class MODEL_TENSOR(IntEnum):
|
|
255
317
|
FFN_GATE_SHEXP = auto()
|
256
318
|
FFN_DOWN_SHEXP = auto()
|
257
319
|
FFN_UP_SHEXP = auto()
|
320
|
+
FFN_EXP_PROBS_B = auto()
|
258
321
|
ATTN_Q_NORM = auto()
|
259
322
|
ATTN_K_NORM = auto()
|
260
323
|
LAYER_OUT_NORM = auto()
|
@@ -265,6 +328,30 @@ class MODEL_TENSOR(IntEnum):
|
|
265
328
|
SSM_A = auto()
|
266
329
|
SSM_D = auto()
|
267
330
|
SSM_OUT = auto()
|
331
|
+
TIME_MIX_W1 = auto()
|
332
|
+
TIME_MIX_W2 = auto()
|
333
|
+
TIME_MIX_LERP_X = auto()
|
334
|
+
TIME_MIX_LERP_K = auto()
|
335
|
+
TIME_MIX_LERP_V = auto()
|
336
|
+
TIME_MIX_LERP_R = auto()
|
337
|
+
TIME_MIX_LERP_G = auto()
|
338
|
+
TIME_MIX_LERP_FUSED = auto()
|
339
|
+
TIME_MIX_LERP_W = auto()
|
340
|
+
TIME_MIX_FIRST = auto()
|
341
|
+
TIME_MIX_DECAY = auto()
|
342
|
+
TIME_MIX_DECAY_W1 = auto()
|
343
|
+
TIME_MIX_DECAY_W2 = auto()
|
344
|
+
TIME_MIX_KEY = auto()
|
345
|
+
TIME_MIX_VALUE = auto()
|
346
|
+
TIME_MIX_RECEPTANCE = auto()
|
347
|
+
TIME_MIX_GATE = auto()
|
348
|
+
TIME_MIX_LN = auto()
|
349
|
+
TIME_MIX_OUTPUT = auto()
|
350
|
+
CHANNEL_MIX_LERP_K = auto()
|
351
|
+
CHANNEL_MIX_LERP_R = auto()
|
352
|
+
CHANNEL_MIX_KEY = auto()
|
353
|
+
CHANNEL_MIX_RECEPTANCE = auto()
|
354
|
+
CHANNEL_MIX_VALUE = auto()
|
268
355
|
ATTN_Q_A = auto()
|
269
356
|
ATTN_Q_B = auto()
|
270
357
|
ATTN_KV_A_MQA = auto()
|
@@ -301,133 +388,211 @@ class MODEL_TENSOR(IntEnum):
|
|
301
388
|
ENC_FFN_DOWN = auto()
|
302
389
|
ENC_FFN_UP = auto()
|
303
390
|
ENC_OUTPUT_NORM = auto()
|
391
|
+
CLS = auto() # classifier
|
392
|
+
CLS_OUT = auto() # classifier output projection
|
393
|
+
CONV1D = auto()
|
394
|
+
CONVNEXT_DW = auto()
|
395
|
+
CONVNEXT_NORM = auto()
|
396
|
+
CONVNEXT_PW1 = auto()
|
397
|
+
CONVNEXT_PW2 = auto()
|
398
|
+
CONVNEXT_GAMMA = auto()
|
399
|
+
POSNET_CONV1 = auto()
|
400
|
+
POSNET_CONV2 = auto()
|
401
|
+
POSNET_NORM = auto()
|
402
|
+
POSNET_NORM1 = auto()
|
403
|
+
POSNET_NORM2 = auto()
|
404
|
+
POSNET_ATTN_NORM = auto()
|
405
|
+
POSNET_ATTN_Q = auto()
|
406
|
+
POSNET_ATTN_K = auto()
|
407
|
+
POSNET_ATTN_V = auto()
|
408
|
+
POSNET_ATTN_OUT = auto()
|
304
409
|
|
305
410
|
|
306
411
|
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
307
|
-
MODEL_ARCH.LLAMA:
|
308
|
-
MODEL_ARCH.
|
309
|
-
MODEL_ARCH.
|
310
|
-
MODEL_ARCH.
|
311
|
-
MODEL_ARCH.
|
312
|
-
MODEL_ARCH.
|
313
|
-
MODEL_ARCH.
|
314
|
-
MODEL_ARCH.
|
315
|
-
MODEL_ARCH.
|
316
|
-
MODEL_ARCH.
|
317
|
-
MODEL_ARCH.
|
318
|
-
MODEL_ARCH.
|
319
|
-
MODEL_ARCH.
|
320
|
-
MODEL_ARCH.
|
321
|
-
MODEL_ARCH.
|
322
|
-
MODEL_ARCH.
|
323
|
-
MODEL_ARCH.
|
324
|
-
MODEL_ARCH.
|
325
|
-
MODEL_ARCH.
|
326
|
-
MODEL_ARCH.
|
327
|
-
MODEL_ARCH.
|
328
|
-
MODEL_ARCH.
|
329
|
-
MODEL_ARCH.
|
330
|
-
MODEL_ARCH.
|
331
|
-
MODEL_ARCH.
|
332
|
-
MODEL_ARCH.
|
333
|
-
MODEL_ARCH.
|
334
|
-
MODEL_ARCH.
|
335
|
-
MODEL_ARCH.
|
336
|
-
MODEL_ARCH.
|
337
|
-
MODEL_ARCH.
|
338
|
-
MODEL_ARCH.
|
339
|
-
MODEL_ARCH.
|
340
|
-
MODEL_ARCH.
|
341
|
-
MODEL_ARCH.
|
342
|
-
MODEL_ARCH.
|
343
|
-
MODEL_ARCH.
|
344
|
-
MODEL_ARCH.
|
345
|
-
MODEL_ARCH.
|
346
|
-
MODEL_ARCH.
|
412
|
+
MODEL_ARCH.LLAMA: "llama",
|
413
|
+
MODEL_ARCH.DECI: "deci",
|
414
|
+
MODEL_ARCH.FALCON: "falcon",
|
415
|
+
MODEL_ARCH.BAICHUAN: "baichuan",
|
416
|
+
MODEL_ARCH.GROK: "grok",
|
417
|
+
MODEL_ARCH.GPT2: "gpt2",
|
418
|
+
MODEL_ARCH.GPTJ: "gptj",
|
419
|
+
MODEL_ARCH.GPTNEOX: "gptneox",
|
420
|
+
MODEL_ARCH.MPT: "mpt",
|
421
|
+
MODEL_ARCH.STARCODER: "starcoder",
|
422
|
+
MODEL_ARCH.REFACT: "refact",
|
423
|
+
MODEL_ARCH.BERT: "bert",
|
424
|
+
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
|
425
|
+
MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
|
426
|
+
MODEL_ARCH.BLOOM: "bloom",
|
427
|
+
MODEL_ARCH.STABLELM: "stablelm",
|
428
|
+
MODEL_ARCH.QWEN: "qwen",
|
429
|
+
MODEL_ARCH.QWEN2: "qwen2",
|
430
|
+
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
431
|
+
MODEL_ARCH.QWEN2VL: "qwen2vl",
|
432
|
+
MODEL_ARCH.PHI2: "phi2",
|
433
|
+
MODEL_ARCH.PHI3: "phi3",
|
434
|
+
MODEL_ARCH.PHIMOE: "phimoe",
|
435
|
+
MODEL_ARCH.PLAMO: "plamo",
|
436
|
+
MODEL_ARCH.CODESHELL: "codeshell",
|
437
|
+
MODEL_ARCH.ORION: "orion",
|
438
|
+
MODEL_ARCH.INTERNLM2: "internlm2",
|
439
|
+
MODEL_ARCH.MINICPM: "minicpm",
|
440
|
+
MODEL_ARCH.MINICPM3: "minicpm3",
|
441
|
+
MODEL_ARCH.GEMMA: "gemma",
|
442
|
+
MODEL_ARCH.GEMMA2: "gemma2",
|
443
|
+
MODEL_ARCH.STARCODER2: "starcoder2",
|
444
|
+
MODEL_ARCH.RWKV6: "rwkv6",
|
445
|
+
MODEL_ARCH.RWKV6QWEN2: "rwkv6qwen2",
|
446
|
+
MODEL_ARCH.MAMBA: "mamba",
|
447
|
+
MODEL_ARCH.XVERSE: "xverse",
|
448
|
+
MODEL_ARCH.COMMAND_R: "command-r",
|
449
|
+
MODEL_ARCH.COHERE2: "cohere2",
|
450
|
+
MODEL_ARCH.DBRX: "dbrx",
|
451
|
+
MODEL_ARCH.OLMO: "olmo",
|
452
|
+
MODEL_ARCH.OLMO2: "olmo2",
|
453
|
+
MODEL_ARCH.OLMOE: "olmoe",
|
454
|
+
MODEL_ARCH.OPENELM: "openelm",
|
455
|
+
MODEL_ARCH.ARCTIC: "arctic",
|
456
|
+
MODEL_ARCH.DEEPSEEK: "deepseek",
|
457
|
+
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
458
|
+
MODEL_ARCH.CHATGLM: "chatglm",
|
459
|
+
MODEL_ARCH.BITNET: "bitnet",
|
460
|
+
MODEL_ARCH.T5: "t5",
|
461
|
+
MODEL_ARCH.T5ENCODER: "t5encoder",
|
462
|
+
MODEL_ARCH.JAIS: "jais",
|
463
|
+
MODEL_ARCH.NEMOTRON: "nemotron",
|
464
|
+
MODEL_ARCH.EXAONE: "exaone",
|
465
|
+
MODEL_ARCH.GRANITE: "granite",
|
466
|
+
MODEL_ARCH.GRANITE_MOE: "granitemoe",
|
467
|
+
MODEL_ARCH.CHAMELEON: "chameleon",
|
468
|
+
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
|
347
469
|
}
|
348
470
|
|
349
471
|
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
350
|
-
MODEL_TENSOR.TOKEN_EMBD:
|
351
|
-
MODEL_TENSOR.TOKEN_EMBD_NORM:
|
352
|
-
MODEL_TENSOR.TOKEN_TYPES:
|
353
|
-
MODEL_TENSOR.POS_EMBD:
|
354
|
-
MODEL_TENSOR.OUTPUT_NORM:
|
355
|
-
MODEL_TENSOR.OUTPUT:
|
356
|
-
MODEL_TENSOR.ROPE_FREQS:
|
357
|
-
MODEL_TENSOR.ROPE_FACTORS_LONG:
|
358
|
-
MODEL_TENSOR.ROPE_FACTORS_SHORT:
|
359
|
-
MODEL_TENSOR.ATTN_NORM:
|
360
|
-
MODEL_TENSOR.ATTN_NORM_2:
|
361
|
-
MODEL_TENSOR.ATTN_QKV:
|
362
|
-
MODEL_TENSOR.ATTN_Q:
|
363
|
-
MODEL_TENSOR.ATTN_K:
|
364
|
-
MODEL_TENSOR.ATTN_V:
|
365
|
-
MODEL_TENSOR.ATTN_OUT:
|
366
|
-
MODEL_TENSOR.ATTN_ROT_EMBD:
|
367
|
-
MODEL_TENSOR.ATTN_Q_NORM:
|
368
|
-
MODEL_TENSOR.ATTN_K_NORM:
|
369
|
-
MODEL_TENSOR.ATTN_OUT_NORM:
|
370
|
-
MODEL_TENSOR.ATTN_POST_NORM:
|
371
|
-
MODEL_TENSOR.FFN_GATE_INP:
|
372
|
-
MODEL_TENSOR.FFN_GATE_INP_SHEXP:
|
373
|
-
MODEL_TENSOR.FFN_NORM:
|
374
|
-
MODEL_TENSOR.FFN_PRE_NORM:
|
375
|
-
MODEL_TENSOR.FFN_POST_NORM:
|
376
|
-
MODEL_TENSOR.FFN_GATE:
|
377
|
-
MODEL_TENSOR.FFN_DOWN:
|
378
|
-
MODEL_TENSOR.FFN_UP:
|
379
|
-
MODEL_TENSOR.FFN_GATE_SHEXP:
|
380
|
-
MODEL_TENSOR.FFN_DOWN_SHEXP:
|
381
|
-
MODEL_TENSOR.FFN_UP_SHEXP:
|
382
|
-
MODEL_TENSOR.FFN_ACT:
|
383
|
-
MODEL_TENSOR.FFN_NORM_EXP:
|
384
|
-
MODEL_TENSOR.FFN_GATE_EXP:
|
385
|
-
MODEL_TENSOR.FFN_DOWN_EXP:
|
386
|
-
MODEL_TENSOR.FFN_UP_EXP:
|
387
|
-
MODEL_TENSOR.
|
388
|
-
MODEL_TENSOR.
|
389
|
-
MODEL_TENSOR.
|
390
|
-
MODEL_TENSOR.
|
391
|
-
MODEL_TENSOR.
|
392
|
-
MODEL_TENSOR.
|
393
|
-
MODEL_TENSOR.
|
394
|
-
MODEL_TENSOR.
|
395
|
-
MODEL_TENSOR.
|
396
|
-
MODEL_TENSOR.
|
397
|
-
MODEL_TENSOR.
|
398
|
-
MODEL_TENSOR.
|
399
|
-
MODEL_TENSOR.
|
400
|
-
MODEL_TENSOR.
|
401
|
-
MODEL_TENSOR.
|
402
|
-
MODEL_TENSOR.
|
403
|
-
MODEL_TENSOR.
|
404
|
-
MODEL_TENSOR.
|
405
|
-
MODEL_TENSOR.
|
406
|
-
MODEL_TENSOR.
|
407
|
-
MODEL_TENSOR.
|
408
|
-
MODEL_TENSOR.
|
409
|
-
MODEL_TENSOR.
|
410
|
-
MODEL_TENSOR.
|
411
|
-
MODEL_TENSOR.
|
412
|
-
MODEL_TENSOR.
|
413
|
-
MODEL_TENSOR.
|
414
|
-
MODEL_TENSOR.
|
415
|
-
MODEL_TENSOR.
|
416
|
-
MODEL_TENSOR.
|
417
|
-
MODEL_TENSOR.
|
418
|
-
MODEL_TENSOR.
|
419
|
-
MODEL_TENSOR.
|
420
|
-
MODEL_TENSOR.
|
421
|
-
MODEL_TENSOR.
|
422
|
-
MODEL_TENSOR.
|
423
|
-
MODEL_TENSOR.
|
424
|
-
MODEL_TENSOR.
|
425
|
-
MODEL_TENSOR.
|
426
|
-
MODEL_TENSOR.
|
427
|
-
MODEL_TENSOR.
|
428
|
-
MODEL_TENSOR.
|
429
|
-
MODEL_TENSOR.
|
430
|
-
MODEL_TENSOR.
|
472
|
+
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
473
|
+
MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
|
474
|
+
MODEL_TENSOR.TOKEN_TYPES: "token_types",
|
475
|
+
MODEL_TENSOR.POS_EMBD: "position_embd",
|
476
|
+
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
477
|
+
MODEL_TENSOR.OUTPUT: "output",
|
478
|
+
MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
|
479
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
|
480
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
|
481
|
+
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
482
|
+
MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
|
483
|
+
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
|
484
|
+
MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
|
485
|
+
MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
|
486
|
+
MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
|
487
|
+
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
|
488
|
+
MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
|
489
|
+
MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
|
490
|
+
MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
|
491
|
+
MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
|
492
|
+
MODEL_TENSOR.ATTN_POST_NORM: "blk.{bid}.post_attention_norm",
|
493
|
+
MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
|
494
|
+
MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
|
495
|
+
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
|
496
|
+
MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
|
497
|
+
MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
|
498
|
+
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
|
499
|
+
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
500
|
+
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
501
|
+
MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
|
502
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
|
503
|
+
MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
|
504
|
+
MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
|
505
|
+
MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
|
506
|
+
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
|
507
|
+
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
|
508
|
+
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
|
509
|
+
MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
|
510
|
+
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
511
|
+
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
512
|
+
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
513
|
+
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
|
514
|
+
MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
|
515
|
+
MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
|
516
|
+
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
|
517
|
+
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
|
518
|
+
MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
|
519
|
+
MODEL_TENSOR.TIME_MIX_W2: "blk.{bid}.time_mix_w2",
|
520
|
+
MODEL_TENSOR.TIME_MIX_LERP_X: "blk.{bid}.time_mix_lerp_x",
|
521
|
+
MODEL_TENSOR.TIME_MIX_LERP_K: "blk.{bid}.time_mix_lerp_k",
|
522
|
+
MODEL_TENSOR.TIME_MIX_LERP_V: "blk.{bid}.time_mix_lerp_v",
|
523
|
+
MODEL_TENSOR.TIME_MIX_LERP_R: "blk.{bid}.time_mix_lerp_r",
|
524
|
+
MODEL_TENSOR.TIME_MIX_LERP_G: "blk.{bid}.time_mix_lerp_g",
|
525
|
+
MODEL_TENSOR.TIME_MIX_LERP_FUSED: "blk.{bid}.time_mix_lerp_fused",
|
526
|
+
MODEL_TENSOR.TIME_MIX_LERP_W: "blk.{bid}.time_mix_lerp_w",
|
527
|
+
MODEL_TENSOR.TIME_MIX_FIRST: "blk.{bid}.time_mix_first",
|
528
|
+
MODEL_TENSOR.TIME_MIX_DECAY: "blk.{bid}.time_mix_decay",
|
529
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W1: "blk.{bid}.time_mix_decay_w1",
|
530
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W2: "blk.{bid}.time_mix_decay_w2",
|
531
|
+
MODEL_TENSOR.TIME_MIX_KEY: "blk.{bid}.time_mix_key",
|
532
|
+
MODEL_TENSOR.TIME_MIX_VALUE: "blk.{bid}.time_mix_value",
|
533
|
+
MODEL_TENSOR.TIME_MIX_RECEPTANCE: "blk.{bid}.time_mix_receptance",
|
534
|
+
MODEL_TENSOR.TIME_MIX_GATE: "blk.{bid}.time_mix_gate",
|
535
|
+
MODEL_TENSOR.TIME_MIX_LN: "blk.{bid}.time_mix_ln",
|
536
|
+
MODEL_TENSOR.TIME_MIX_OUTPUT: "blk.{bid}.time_mix_output",
|
537
|
+
MODEL_TENSOR.CHANNEL_MIX_LERP_K: "blk.{bid}.channel_mix_lerp_k",
|
538
|
+
MODEL_TENSOR.CHANNEL_MIX_LERP_R: "blk.{bid}.channel_mix_lerp_r",
|
539
|
+
MODEL_TENSOR.CHANNEL_MIX_KEY: "blk.{bid}.channel_mix_key",
|
540
|
+
MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: "blk.{bid}.channel_mix_receptance",
|
541
|
+
MODEL_TENSOR.CHANNEL_MIX_VALUE: "blk.{bid}.channel_mix_value",
|
542
|
+
MODEL_TENSOR.ATTN_Q_A: "blk.{bid}.attn_q_a",
|
543
|
+
MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
|
544
|
+
MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
|
545
|
+
MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
|
546
|
+
MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
|
547
|
+
MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
|
548
|
+
MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
|
549
|
+
MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
|
550
|
+
MODEL_TENSOR.DEC_ATTN_NORM: "dec.blk.{bid}.attn_norm",
|
551
|
+
MODEL_TENSOR.DEC_ATTN_Q: "dec.blk.{bid}.attn_q",
|
552
|
+
MODEL_TENSOR.DEC_ATTN_K: "dec.blk.{bid}.attn_k",
|
553
|
+
MODEL_TENSOR.DEC_ATTN_V: "dec.blk.{bid}.attn_v",
|
554
|
+
MODEL_TENSOR.DEC_ATTN_OUT: "dec.blk.{bid}.attn_o",
|
555
|
+
MODEL_TENSOR.DEC_ATTN_REL_B: "dec.blk.{bid}.attn_rel_b",
|
556
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_NORM: "dec.blk.{bid}.cross_attn_norm",
|
557
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_Q: "dec.blk.{bid}.cross_attn_q",
|
558
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_K: "dec.blk.{bid}.cross_attn_k",
|
559
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_V: "dec.blk.{bid}.cross_attn_v",
|
560
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_OUT: "dec.blk.{bid}.cross_attn_o",
|
561
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: "dec.blk.{bid}.cross_attn_rel_b",
|
562
|
+
MODEL_TENSOR.DEC_FFN_NORM: "dec.blk.{bid}.ffn_norm",
|
563
|
+
MODEL_TENSOR.DEC_FFN_GATE: "dec.blk.{bid}.ffn_gate",
|
564
|
+
MODEL_TENSOR.DEC_FFN_DOWN: "dec.blk.{bid}.ffn_down",
|
565
|
+
MODEL_TENSOR.DEC_FFN_UP: "dec.blk.{bid}.ffn_up",
|
566
|
+
MODEL_TENSOR.DEC_OUTPUT_NORM: "dec.output_norm",
|
567
|
+
MODEL_TENSOR.ENC_ATTN_NORM: "enc.blk.{bid}.attn_norm",
|
568
|
+
MODEL_TENSOR.ENC_ATTN_Q: "enc.blk.{bid}.attn_q",
|
569
|
+
MODEL_TENSOR.ENC_ATTN_K: "enc.blk.{bid}.attn_k",
|
570
|
+
MODEL_TENSOR.ENC_ATTN_V: "enc.blk.{bid}.attn_v",
|
571
|
+
MODEL_TENSOR.ENC_ATTN_OUT: "enc.blk.{bid}.attn_o",
|
572
|
+
MODEL_TENSOR.ENC_ATTN_REL_B: "enc.blk.{bid}.attn_rel_b",
|
573
|
+
MODEL_TENSOR.ENC_FFN_NORM: "enc.blk.{bid}.ffn_norm",
|
574
|
+
MODEL_TENSOR.ENC_FFN_GATE: "enc.blk.{bid}.ffn_gate",
|
575
|
+
MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
|
576
|
+
MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
|
577
|
+
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
|
578
|
+
MODEL_TENSOR.CLS: "cls",
|
579
|
+
MODEL_TENSOR.CLS_OUT: "cls.output",
|
580
|
+
MODEL_TENSOR.CONV1D: "conv1d",
|
581
|
+
MODEL_TENSOR.CONVNEXT_DW: "convnext.{bid}.dw",
|
582
|
+
MODEL_TENSOR.CONVNEXT_NORM: "convnext.{bid}.norm",
|
583
|
+
MODEL_TENSOR.CONVNEXT_PW1: "convnext.{bid}.pw1",
|
584
|
+
MODEL_TENSOR.CONVNEXT_PW2: "convnext.{bid}.pw2",
|
585
|
+
MODEL_TENSOR.CONVNEXT_GAMMA: "convnext.{bid}.gamma",
|
586
|
+
MODEL_TENSOR.POSNET_CONV1: "posnet.{bid}.conv1",
|
587
|
+
MODEL_TENSOR.POSNET_CONV2: "posnet.{bid}.conv2",
|
588
|
+
MODEL_TENSOR.POSNET_NORM: "posnet.{bid}.norm",
|
589
|
+
MODEL_TENSOR.POSNET_NORM1: "posnet.{bid}.norm1",
|
590
|
+
MODEL_TENSOR.POSNET_NORM2: "posnet.{bid}.norm2",
|
591
|
+
MODEL_TENSOR.POSNET_ATTN_NORM: "posnet.{bid}.attn_norm",
|
592
|
+
MODEL_TENSOR.POSNET_ATTN_Q: "posnet.{bid}.attn_q",
|
593
|
+
MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k",
|
594
|
+
MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v",
|
595
|
+
MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output",
|
431
596
|
}
|
432
597
|
|
433
598
|
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
@@ -451,6 +616,26 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
451
616
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
452
617
|
MODEL_TENSOR.FFN_UP_EXP,
|
453
618
|
],
|
619
|
+
MODEL_ARCH.DECI: [
|
620
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
621
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
622
|
+
MODEL_TENSOR.OUTPUT,
|
623
|
+
MODEL_TENSOR.ROPE_FREQS,
|
624
|
+
MODEL_TENSOR.ATTN_NORM,
|
625
|
+
MODEL_TENSOR.ATTN_Q,
|
626
|
+
MODEL_TENSOR.ATTN_K,
|
627
|
+
MODEL_TENSOR.ATTN_V,
|
628
|
+
MODEL_TENSOR.ATTN_OUT,
|
629
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
630
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
631
|
+
MODEL_TENSOR.FFN_NORM,
|
632
|
+
MODEL_TENSOR.FFN_GATE,
|
633
|
+
MODEL_TENSOR.FFN_DOWN,
|
634
|
+
MODEL_TENSOR.FFN_UP,
|
635
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
636
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
637
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
638
|
+
],
|
454
639
|
MODEL_ARCH.GROK: [
|
455
640
|
MODEL_TENSOR.TOKEN_EMBD,
|
456
641
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -537,6 +722,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
537
722
|
MODEL_TENSOR.FFN_DOWN,
|
538
723
|
MODEL_TENSOR.FFN_UP,
|
539
724
|
MODEL_TENSOR.LAYER_OUT_NORM,
|
725
|
+
MODEL_TENSOR.CLS,
|
726
|
+
MODEL_TENSOR.CLS_OUT,
|
540
727
|
],
|
541
728
|
MODEL_ARCH.NOMIC_BERT: [
|
542
729
|
MODEL_TENSOR.TOKEN_EMBD,
|
@@ -568,6 +755,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
568
755
|
MODEL_TENSOR.FFN_GATE,
|
569
756
|
MODEL_TENSOR.FFN_DOWN,
|
570
757
|
MODEL_TENSOR.LAYER_OUT_NORM,
|
758
|
+
MODEL_TENSOR.CLS,
|
571
759
|
],
|
572
760
|
MODEL_ARCH.MPT: [
|
573
761
|
MODEL_TENSOR.TOKEN_EMBD,
|
@@ -654,6 +842,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
654
842
|
MODEL_TENSOR.FFN_UP,
|
655
843
|
],
|
656
844
|
MODEL_ARCH.QWEN2: [
|
845
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
846
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
847
|
+
MODEL_TENSOR.OUTPUT,
|
848
|
+
MODEL_TENSOR.ROPE_FREQS,
|
849
|
+
MODEL_TENSOR.ATTN_NORM,
|
850
|
+
MODEL_TENSOR.ATTN_Q,
|
851
|
+
MODEL_TENSOR.ATTN_K,
|
852
|
+
MODEL_TENSOR.ATTN_V,
|
853
|
+
MODEL_TENSOR.ATTN_OUT,
|
854
|
+
MODEL_TENSOR.FFN_NORM,
|
855
|
+
MODEL_TENSOR.FFN_GATE,
|
856
|
+
MODEL_TENSOR.FFN_DOWN,
|
857
|
+
MODEL_TENSOR.FFN_UP,
|
858
|
+
],
|
859
|
+
MODEL_ARCH.QWEN2VL: [
|
657
860
|
MODEL_TENSOR.TOKEN_EMBD,
|
658
861
|
MODEL_TENSOR.OUTPUT_NORM,
|
659
862
|
MODEL_TENSOR.OUTPUT,
|
@@ -731,6 +934,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
731
934
|
MODEL_TENSOR.TOKEN_EMBD,
|
732
935
|
MODEL_TENSOR.OUTPUT_NORM,
|
733
936
|
MODEL_TENSOR.OUTPUT,
|
937
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
938
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
734
939
|
MODEL_TENSOR.ATTN_NORM,
|
735
940
|
MODEL_TENSOR.ATTN_QKV,
|
736
941
|
MODEL_TENSOR.ATTN_Q,
|
@@ -741,6 +946,24 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
741
946
|
MODEL_TENSOR.FFN_DOWN,
|
742
947
|
MODEL_TENSOR.FFN_UP,
|
743
948
|
],
|
949
|
+
MODEL_ARCH.PHIMOE: [
|
950
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
951
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
952
|
+
MODEL_TENSOR.OUTPUT,
|
953
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
954
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
955
|
+
MODEL_TENSOR.ATTN_NORM,
|
956
|
+
MODEL_TENSOR.ATTN_QKV,
|
957
|
+
MODEL_TENSOR.ATTN_Q,
|
958
|
+
MODEL_TENSOR.ATTN_K,
|
959
|
+
MODEL_TENSOR.ATTN_V,
|
960
|
+
MODEL_TENSOR.ATTN_OUT,
|
961
|
+
MODEL_TENSOR.FFN_NORM,
|
962
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
963
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
964
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
965
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
966
|
+
],
|
744
967
|
MODEL_ARCH.CODESHELL: [
|
745
968
|
MODEL_TENSOR.TOKEN_EMBD,
|
746
969
|
MODEL_TENSOR.POS_EMBD,
|
@@ -790,6 +1013,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
790
1013
|
MODEL_TENSOR.OUTPUT,
|
791
1014
|
MODEL_TENSOR.OUTPUT_NORM,
|
792
1015
|
MODEL_TENSOR.ROPE_FREQS,
|
1016
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
1017
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
793
1018
|
MODEL_TENSOR.ATTN_NORM,
|
794
1019
|
MODEL_TENSOR.ATTN_Q,
|
795
1020
|
MODEL_TENSOR.ATTN_K,
|
@@ -805,6 +1030,25 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
805
1030
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
806
1031
|
MODEL_TENSOR.FFN_UP_EXP,
|
807
1032
|
],
|
1033
|
+
MODEL_ARCH.MINICPM3: [
|
1034
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1035
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1036
|
+
MODEL_TENSOR.OUTPUT,
|
1037
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
1038
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
1039
|
+
MODEL_TENSOR.ATTN_NORM,
|
1040
|
+
MODEL_TENSOR.ATTN_Q_A,
|
1041
|
+
MODEL_TENSOR.ATTN_Q_B,
|
1042
|
+
MODEL_TENSOR.ATTN_KV_A_MQA,
|
1043
|
+
MODEL_TENSOR.ATTN_KV_B,
|
1044
|
+
MODEL_TENSOR.ATTN_Q_A_NORM,
|
1045
|
+
MODEL_TENSOR.ATTN_KV_A_NORM,
|
1046
|
+
MODEL_TENSOR.ATTN_OUT,
|
1047
|
+
MODEL_TENSOR.FFN_NORM,
|
1048
|
+
MODEL_TENSOR.FFN_GATE,
|
1049
|
+
MODEL_TENSOR.FFN_DOWN,
|
1050
|
+
MODEL_TENSOR.FFN_UP,
|
1051
|
+
],
|
808
1052
|
MODEL_ARCH.GEMMA: [
|
809
1053
|
MODEL_TENSOR.TOKEN_EMBD,
|
810
1054
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -848,6 +1092,67 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
848
1092
|
MODEL_TENSOR.FFN_DOWN,
|
849
1093
|
MODEL_TENSOR.FFN_UP,
|
850
1094
|
],
|
1095
|
+
MODEL_ARCH.RWKV6: [
|
1096
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1097
|
+
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
1098
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1099
|
+
MODEL_TENSOR.OUTPUT,
|
1100
|
+
MODEL_TENSOR.ATTN_NORM,
|
1101
|
+
MODEL_TENSOR.ATTN_NORM_2,
|
1102
|
+
MODEL_TENSOR.TIME_MIX_W1,
|
1103
|
+
MODEL_TENSOR.TIME_MIX_W2,
|
1104
|
+
MODEL_TENSOR.TIME_MIX_LERP_X,
|
1105
|
+
MODEL_TENSOR.TIME_MIX_LERP_K,
|
1106
|
+
MODEL_TENSOR.TIME_MIX_LERP_V,
|
1107
|
+
MODEL_TENSOR.TIME_MIX_LERP_R,
|
1108
|
+
MODEL_TENSOR.TIME_MIX_LERP_G,
|
1109
|
+
MODEL_TENSOR.TIME_MIX_LERP_W,
|
1110
|
+
MODEL_TENSOR.TIME_MIX_LERP_FUSED,
|
1111
|
+
MODEL_TENSOR.TIME_MIX_FIRST,
|
1112
|
+
MODEL_TENSOR.TIME_MIX_DECAY,
|
1113
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W1,
|
1114
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W2,
|
1115
|
+
MODEL_TENSOR.TIME_MIX_KEY,
|
1116
|
+
MODEL_TENSOR.TIME_MIX_VALUE,
|
1117
|
+
MODEL_TENSOR.TIME_MIX_RECEPTANCE,
|
1118
|
+
MODEL_TENSOR.TIME_MIX_GATE,
|
1119
|
+
MODEL_TENSOR.TIME_MIX_LN,
|
1120
|
+
MODEL_TENSOR.TIME_MIX_OUTPUT,
|
1121
|
+
MODEL_TENSOR.CHANNEL_MIX_LERP_K,
|
1122
|
+
MODEL_TENSOR.CHANNEL_MIX_LERP_R,
|
1123
|
+
MODEL_TENSOR.CHANNEL_MIX_KEY,
|
1124
|
+
MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE,
|
1125
|
+
MODEL_TENSOR.CHANNEL_MIX_VALUE,
|
1126
|
+
],
|
1127
|
+
MODEL_ARCH.RWKV6QWEN2: [
|
1128
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1129
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1130
|
+
MODEL_TENSOR.OUTPUT,
|
1131
|
+
MODEL_TENSOR.ATTN_NORM,
|
1132
|
+
MODEL_TENSOR.TIME_MIX_W1,
|
1133
|
+
MODEL_TENSOR.TIME_MIX_W2,
|
1134
|
+
MODEL_TENSOR.TIME_MIX_LERP_X,
|
1135
|
+
MODEL_TENSOR.TIME_MIX_LERP_K,
|
1136
|
+
MODEL_TENSOR.TIME_MIX_LERP_V,
|
1137
|
+
MODEL_TENSOR.TIME_MIX_LERP_R,
|
1138
|
+
MODEL_TENSOR.TIME_MIX_LERP_G,
|
1139
|
+
MODEL_TENSOR.TIME_MIX_LERP_W,
|
1140
|
+
MODEL_TENSOR.TIME_MIX_LERP_FUSED,
|
1141
|
+
MODEL_TENSOR.TIME_MIX_FIRST,
|
1142
|
+
MODEL_TENSOR.TIME_MIX_DECAY,
|
1143
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W1,
|
1144
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W2,
|
1145
|
+
MODEL_TENSOR.TIME_MIX_KEY,
|
1146
|
+
MODEL_TENSOR.TIME_MIX_VALUE,
|
1147
|
+
MODEL_TENSOR.TIME_MIX_RECEPTANCE,
|
1148
|
+
MODEL_TENSOR.TIME_MIX_GATE,
|
1149
|
+
MODEL_TENSOR.TIME_MIX_LN,
|
1150
|
+
MODEL_TENSOR.TIME_MIX_OUTPUT,
|
1151
|
+
MODEL_TENSOR.FFN_NORM,
|
1152
|
+
MODEL_TENSOR.FFN_GATE,
|
1153
|
+
MODEL_TENSOR.FFN_DOWN,
|
1154
|
+
MODEL_TENSOR.FFN_UP,
|
1155
|
+
],
|
851
1156
|
MODEL_ARCH.MAMBA: [
|
852
1157
|
MODEL_TENSOR.TOKEN_EMBD,
|
853
1158
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -891,6 +1196,18 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
891
1196
|
MODEL_TENSOR.ATTN_K_NORM,
|
892
1197
|
MODEL_TENSOR.ATTN_Q_NORM,
|
893
1198
|
],
|
1199
|
+
MODEL_ARCH.COHERE2: [
|
1200
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1201
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1202
|
+
MODEL_TENSOR.ATTN_NORM,
|
1203
|
+
MODEL_TENSOR.ATTN_Q,
|
1204
|
+
MODEL_TENSOR.ATTN_K,
|
1205
|
+
MODEL_TENSOR.ATTN_V,
|
1206
|
+
MODEL_TENSOR.ATTN_OUT,
|
1207
|
+
MODEL_TENSOR.FFN_GATE,
|
1208
|
+
MODEL_TENSOR.FFN_DOWN,
|
1209
|
+
MODEL_TENSOR.FFN_UP,
|
1210
|
+
],
|
894
1211
|
MODEL_ARCH.DBRX: [
|
895
1212
|
MODEL_TENSOR.TOKEN_EMBD,
|
896
1213
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -915,6 +1232,39 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
915
1232
|
MODEL_TENSOR.FFN_DOWN,
|
916
1233
|
MODEL_TENSOR.FFN_UP,
|
917
1234
|
],
|
1235
|
+
MODEL_ARCH.OLMO2: [
|
1236
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1237
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1238
|
+
MODEL_TENSOR.OUTPUT,
|
1239
|
+
MODEL_TENSOR.ATTN_Q,
|
1240
|
+
MODEL_TENSOR.ATTN_K,
|
1241
|
+
MODEL_TENSOR.ATTN_V,
|
1242
|
+
MODEL_TENSOR.ATTN_OUT,
|
1243
|
+
MODEL_TENSOR.ATTN_POST_NORM,
|
1244
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
1245
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
1246
|
+
MODEL_TENSOR.FFN_POST_NORM,
|
1247
|
+
MODEL_TENSOR.FFN_GATE,
|
1248
|
+
MODEL_TENSOR.FFN_DOWN,
|
1249
|
+
MODEL_TENSOR.FFN_UP,
|
1250
|
+
],
|
1251
|
+
MODEL_ARCH.OLMOE: [
|
1252
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1253
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1254
|
+
MODEL_TENSOR.OUTPUT,
|
1255
|
+
MODEL_TENSOR.ATTN_OUT,
|
1256
|
+
MODEL_TENSOR.ATTN_Q,
|
1257
|
+
MODEL_TENSOR.ATTN_K,
|
1258
|
+
MODEL_TENSOR.ATTN_V,
|
1259
|
+
MODEL_TENSOR.ATTN_NORM,
|
1260
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
1261
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
1262
|
+
MODEL_TENSOR.FFN_NORM,
|
1263
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
1264
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
1265
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
1266
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
1267
|
+
],
|
918
1268
|
MODEL_ARCH.OPENELM: [
|
919
1269
|
MODEL_TENSOR.TOKEN_EMBD,
|
920
1270
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -949,6 +1299,29 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
949
1299
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
950
1300
|
MODEL_TENSOR.FFN_UP_EXP,
|
951
1301
|
],
|
1302
|
+
MODEL_ARCH.DEEPSEEK: [
|
1303
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1304
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1305
|
+
MODEL_TENSOR.OUTPUT,
|
1306
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1307
|
+
MODEL_TENSOR.ATTN_NORM,
|
1308
|
+
MODEL_TENSOR.ATTN_Q,
|
1309
|
+
MODEL_TENSOR.ATTN_K,
|
1310
|
+
MODEL_TENSOR.ATTN_V,
|
1311
|
+
MODEL_TENSOR.ATTN_OUT,
|
1312
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1313
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
1314
|
+
MODEL_TENSOR.FFN_NORM,
|
1315
|
+
MODEL_TENSOR.FFN_GATE,
|
1316
|
+
MODEL_TENSOR.FFN_DOWN,
|
1317
|
+
MODEL_TENSOR.FFN_UP,
|
1318
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
1319
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
1320
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
1321
|
+
MODEL_TENSOR.FFN_GATE_SHEXP,
|
1322
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
1323
|
+
MODEL_TENSOR.FFN_UP_SHEXP,
|
1324
|
+
],
|
952
1325
|
MODEL_ARCH.DEEPSEEK2: [
|
953
1326
|
MODEL_TENSOR.TOKEN_EMBD,
|
954
1327
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -975,6 +1348,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
975
1348
|
MODEL_TENSOR.FFN_GATE_SHEXP,
|
976
1349
|
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
977
1350
|
MODEL_TENSOR.FFN_UP_SHEXP,
|
1351
|
+
MODEL_TENSOR.FFN_EXP_PROBS_B,
|
978
1352
|
],
|
979
1353
|
MODEL_ARCH.CHATGLM : [
|
980
1354
|
MODEL_TENSOR.TOKEN_EMBD,
|
@@ -983,6 +1357,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
983
1357
|
MODEL_TENSOR.OUTPUT,
|
984
1358
|
MODEL_TENSOR.ATTN_NORM,
|
985
1359
|
MODEL_TENSOR.ATTN_QKV,
|
1360
|
+
MODEL_TENSOR.ATTN_Q,
|
1361
|
+
MODEL_TENSOR.ATTN_K,
|
1362
|
+
MODEL_TENSOR.ATTN_V,
|
986
1363
|
MODEL_TENSOR.ATTN_OUT,
|
987
1364
|
MODEL_TENSOR.FFN_NORM,
|
988
1365
|
MODEL_TENSOR.FFN_DOWN,
|
@@ -1035,6 +1412,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1035
1412
|
MODEL_TENSOR.ENC_FFN_UP,
|
1036
1413
|
MODEL_TENSOR.ENC_OUTPUT_NORM,
|
1037
1414
|
],
|
1415
|
+
MODEL_ARCH.T5ENCODER: [
|
1416
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1417
|
+
MODEL_TENSOR.OUTPUT,
|
1418
|
+
MODEL_TENSOR.ENC_ATTN_NORM,
|
1419
|
+
MODEL_TENSOR.ENC_ATTN_Q,
|
1420
|
+
MODEL_TENSOR.ENC_ATTN_K,
|
1421
|
+
MODEL_TENSOR.ENC_ATTN_V,
|
1422
|
+
MODEL_TENSOR.ENC_ATTN_OUT,
|
1423
|
+
MODEL_TENSOR.ENC_ATTN_REL_B,
|
1424
|
+
MODEL_TENSOR.ENC_FFN_NORM,
|
1425
|
+
MODEL_TENSOR.ENC_FFN_GATE,
|
1426
|
+
MODEL_TENSOR.ENC_FFN_DOWN,
|
1427
|
+
MODEL_TENSOR.ENC_FFN_UP,
|
1428
|
+
MODEL_TENSOR.ENC_OUTPUT_NORM,
|
1429
|
+
],
|
1038
1430
|
MODEL_ARCH.JAIS: [
|
1039
1431
|
MODEL_TENSOR.TOKEN_EMBD,
|
1040
1432
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -1047,6 +1439,104 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1047
1439
|
MODEL_TENSOR.FFN_GATE,
|
1048
1440
|
MODEL_TENSOR.FFN_UP,
|
1049
1441
|
],
|
1442
|
+
MODEL_ARCH.NEMOTRON: [
|
1443
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1444
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1445
|
+
MODEL_TENSOR.OUTPUT,
|
1446
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1447
|
+
MODEL_TENSOR.ATTN_NORM,
|
1448
|
+
MODEL_TENSOR.ATTN_Q,
|
1449
|
+
MODEL_TENSOR.ATTN_K,
|
1450
|
+
MODEL_TENSOR.ATTN_V,
|
1451
|
+
MODEL_TENSOR.ATTN_OUT,
|
1452
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1453
|
+
MODEL_TENSOR.FFN_NORM,
|
1454
|
+
MODEL_TENSOR.FFN_DOWN,
|
1455
|
+
MODEL_TENSOR.FFN_UP,
|
1456
|
+
],
|
1457
|
+
MODEL_ARCH.EXAONE: [
|
1458
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1459
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1460
|
+
MODEL_TENSOR.OUTPUT,
|
1461
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1462
|
+
MODEL_TENSOR.ATTN_NORM,
|
1463
|
+
MODEL_TENSOR.ATTN_Q,
|
1464
|
+
MODEL_TENSOR.ATTN_K,
|
1465
|
+
MODEL_TENSOR.ATTN_V,
|
1466
|
+
MODEL_TENSOR.ATTN_OUT,
|
1467
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1468
|
+
MODEL_TENSOR.FFN_NORM,
|
1469
|
+
MODEL_TENSOR.FFN_GATE,
|
1470
|
+
MODEL_TENSOR.FFN_DOWN,
|
1471
|
+
MODEL_TENSOR.FFN_UP,
|
1472
|
+
],
|
1473
|
+
MODEL_ARCH.GRANITE: [
|
1474
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1475
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1476
|
+
MODEL_TENSOR.OUTPUT,
|
1477
|
+
MODEL_TENSOR.ATTN_NORM,
|
1478
|
+
MODEL_TENSOR.ATTN_Q,
|
1479
|
+
MODEL_TENSOR.ATTN_K,
|
1480
|
+
MODEL_TENSOR.ATTN_V,
|
1481
|
+
MODEL_TENSOR.ATTN_OUT,
|
1482
|
+
MODEL_TENSOR.FFN_NORM,
|
1483
|
+
MODEL_TENSOR.FFN_GATE,
|
1484
|
+
MODEL_TENSOR.FFN_DOWN,
|
1485
|
+
MODEL_TENSOR.FFN_UP,
|
1486
|
+
],
|
1487
|
+
MODEL_ARCH.GRANITE_MOE: [
|
1488
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1489
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1490
|
+
MODEL_TENSOR.OUTPUT,
|
1491
|
+
MODEL_TENSOR.ATTN_NORM,
|
1492
|
+
MODEL_TENSOR.ATTN_Q,
|
1493
|
+
MODEL_TENSOR.ATTN_K,
|
1494
|
+
MODEL_TENSOR.ATTN_V,
|
1495
|
+
MODEL_TENSOR.ATTN_OUT,
|
1496
|
+
MODEL_TENSOR.FFN_NORM,
|
1497
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
1498
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
1499
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
1500
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
1501
|
+
],
|
1502
|
+
MODEL_ARCH.CHAMELEON: [
|
1503
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1504
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1505
|
+
MODEL_TENSOR.OUTPUT,
|
1506
|
+
MODEL_TENSOR.ATTN_NORM,
|
1507
|
+
MODEL_TENSOR.ATTN_Q,
|
1508
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
1509
|
+
MODEL_TENSOR.ATTN_K,
|
1510
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
1511
|
+
MODEL_TENSOR.ATTN_V,
|
1512
|
+
MODEL_TENSOR.ATTN_OUT,
|
1513
|
+
MODEL_TENSOR.FFN_NORM,
|
1514
|
+
MODEL_TENSOR.FFN_GATE,
|
1515
|
+
MODEL_TENSOR.FFN_DOWN,
|
1516
|
+
MODEL_TENSOR.FFN_UP,
|
1517
|
+
],
|
1518
|
+
MODEL_ARCH.WAVTOKENIZER_DEC: [
|
1519
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1520
|
+
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
1521
|
+
MODEL_TENSOR.CONV1D,
|
1522
|
+
MODEL_TENSOR.CONVNEXT_DW,
|
1523
|
+
MODEL_TENSOR.CONVNEXT_NORM,
|
1524
|
+
MODEL_TENSOR.CONVNEXT_PW1,
|
1525
|
+
MODEL_TENSOR.CONVNEXT_PW2,
|
1526
|
+
MODEL_TENSOR.CONVNEXT_GAMMA,
|
1527
|
+
MODEL_TENSOR.OUTPUT,
|
1528
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1529
|
+
MODEL_TENSOR.POSNET_CONV1,
|
1530
|
+
MODEL_TENSOR.POSNET_CONV2,
|
1531
|
+
MODEL_TENSOR.POSNET_NORM,
|
1532
|
+
MODEL_TENSOR.POSNET_NORM1,
|
1533
|
+
MODEL_TENSOR.POSNET_NORM2,
|
1534
|
+
MODEL_TENSOR.POSNET_ATTN_NORM,
|
1535
|
+
MODEL_TENSOR.POSNET_ATTN_Q,
|
1536
|
+
MODEL_TENSOR.POSNET_ATTN_K,
|
1537
|
+
MODEL_TENSOR.POSNET_ATTN_V,
|
1538
|
+
MODEL_TENSOR.POSNET_ATTN_OUT,
|
1539
|
+
],
|
1050
1540
|
# TODO
|
1051
1541
|
}
|
1052
1542
|
|
@@ -1056,6 +1546,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1056
1546
|
MODEL_TENSOR.ROPE_FREQS,
|
1057
1547
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1058
1548
|
],
|
1549
|
+
MODEL_ARCH.DECI: [
|
1550
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1551
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1552
|
+
],
|
1059
1553
|
MODEL_ARCH.BAICHUAN: [
|
1060
1554
|
MODEL_TENSOR.ROPE_FREQS,
|
1061
1555
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
@@ -1080,6 +1574,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1080
1574
|
MODEL_TENSOR.ROPE_FREQS,
|
1081
1575
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1082
1576
|
],
|
1577
|
+
MODEL_ARCH.DEEPSEEK: [
|
1578
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1579
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1580
|
+
],
|
1083
1581
|
MODEL_ARCH.DEEPSEEK2: [
|
1084
1582
|
MODEL_TENSOR.ROPE_FREQS,
|
1085
1583
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
@@ -1087,6 +1585,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1087
1585
|
MODEL_ARCH.CHATGLM: [
|
1088
1586
|
MODEL_TENSOR.ROPE_FREQS,
|
1089
1587
|
],
|
1588
|
+
MODEL_ARCH.NEMOTRON: [
|
1589
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1590
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1591
|
+
],
|
1090
1592
|
}
|
1091
1593
|
|
1092
1594
|
#
|
@@ -1104,9 +1606,10 @@ class TokenType(IntEnum):
|
|
1104
1606
|
|
1105
1607
|
|
1106
1608
|
class RopeScalingType(Enum):
|
1107
|
-
NONE
|
1108
|
-
LINEAR
|
1109
|
-
YARN
|
1609
|
+
NONE = 'none'
|
1610
|
+
LINEAR = 'linear'
|
1611
|
+
YARN = 'yarn'
|
1612
|
+
LONGROPE = 'longrope'
|
1110
1613
|
|
1111
1614
|
|
1112
1615
|
class PoolingType(IntEnum):
|
@@ -1145,6 +1648,13 @@ class GGMLQuantizationType(IntEnum):
|
|
1145
1648
|
F64 = 28
|
1146
1649
|
IQ1_M = 29
|
1147
1650
|
BF16 = 30
|
1651
|
+
TQ1_0 = 34
|
1652
|
+
TQ2_0 = 35
|
1653
|
+
|
1654
|
+
|
1655
|
+
class ExpertGatingFuncType(IntEnum):
|
1656
|
+
SOFTMAX = 1
|
1657
|
+
SIGMOID = 2
|
1148
1658
|
|
1149
1659
|
|
1150
1660
|
# TODO: add GGMLFileType from ggml_ftype in ggml.h
|
@@ -1157,7 +1667,7 @@ class LlamaFileType(IntEnum):
|
|
1157
1667
|
MOSTLY_F16 = 1 # except 1d tensors
|
1158
1668
|
MOSTLY_Q4_0 = 2 # except 1d tensors
|
1159
1669
|
MOSTLY_Q4_1 = 3 # except 1d tensors
|
1160
|
-
MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
|
1670
|
+
# MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
|
1161
1671
|
# MOSTLY_Q4_2 = 5 # support has been removed
|
1162
1672
|
# MOSTLY_Q4_3 = 6 # support has been removed
|
1163
1673
|
MOSTLY_Q8_0 = 7 # except 1d tensors
|
@@ -1186,6 +1696,11 @@ class LlamaFileType(IntEnum):
|
|
1186
1696
|
MOSTLY_IQ4_XS = 30 # except 1d tensors
|
1187
1697
|
MOSTLY_IQ1_M = 31 # except 1d tensors
|
1188
1698
|
MOSTLY_BF16 = 32 # except 1d tensors
|
1699
|
+
# MOSTLY_Q4_0_4_4 = 33 # removed from gguf files, use Q4_0 and runtime repack
|
1700
|
+
# MOSTLY_Q4_0_4_8 = 34 # removed from gguf files, use Q4_0 and runtime repack
|
1701
|
+
# MOSTLY_Q4_0_8_8 = 35 # removed from gguf files, use Q4_0 and runtime repack
|
1702
|
+
MOSTLY_TQ1_0 = 36 # except 1d tensors
|
1703
|
+
MOSTLY_TQ2_0 = 37 # except 1d tensors
|
1189
1704
|
|
1190
1705
|
GUESSED = 1024 # not specified in the model file
|
1191
1706
|
|
@@ -1259,6 +1774,8 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|
|
1259
1774
|
GGMLQuantizationType.F64: (1, 8),
|
1260
1775
|
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
|
1261
1776
|
GGMLQuantizationType.BF16: (1, 2),
|
1777
|
+
GGMLQuantizationType.TQ1_0: (256, 2 + 4 * 13),
|
1778
|
+
GGMLQuantizationType.TQ2_0: (256, 2 + 64),
|
1262
1779
|
}
|
1263
1780
|
|
1264
1781
|
|
@@ -1306,6 +1823,7 @@ KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
|
|
1306
1823
|
KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
|
1307
1824
|
KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
|
1308
1825
|
KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
|
1826
|
+
KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
|
1309
1827
|
|
1310
1828
|
# tokenization
|
1311
1829
|
KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
|
@@ -1316,14 +1834,23 @@ KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES
|
|
1316
1834
|
KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES
|
1317
1835
|
KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID
|
1318
1836
|
KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID
|
1837
|
+
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
|
1838
|
+
KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
|
1319
1839
|
KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
|
1320
1840
|
KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
|
1321
1841
|
KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
|
1322
|
-
KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
|
1323
1842
|
KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
|
1324
1843
|
KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
|
1325
1844
|
KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
|
1326
|
-
|
1845
|
+
|
1846
|
+
KEY_TOKENIZER_FIM_PRE_ID = Keys.Tokenizer.FIM_PRE_ID
|
1847
|
+
KEY_TOKENIZER_FIM_SUF_ID = Keys.Tokenizer.FIM_SUF_ID
|
1848
|
+
KEY_TOKENIZER_FIM_MID_ID = Keys.Tokenizer.FIM_MID_ID
|
1849
|
+
KEY_TOKENIZER_FIM_PAD_ID = Keys.Tokenizer.FIM_PAD_ID
|
1850
|
+
KEY_TOKENIZER_FIM_REP_ID = Keys.Tokenizer.FIM_REP_ID
|
1851
|
+
KEY_TOKENIZER_FIM_SEP_ID = Keys.Tokenizer.FIM_SEP_ID
|
1852
|
+
|
1853
|
+
# deprecated
|
1854
|
+
KEY_TOKENIZER_PREFIX_ID = Keys.Tokenizer.PREFIX_ID
|
1327
1855
|
KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
|
1328
1856
|
KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
|
1329
|
-
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
|