bigdl-core-cpp 2.1.0b20240820.post1__py3-none-win_amd64.whl → 2.2.0b20250217.post0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/{convert-hf-to-gguf.py → convert_hf_to_gguf.py} +908 -140
- bigdl/cpp/convert_hf_to_gguf_update.py +376 -0
- bigdl/cpp/convert_llama_ggml_to_gguf.py +450 -0
- bigdl/cpp/convert_lora_to_gguf.py +433 -0
- bigdl/cpp/gguf-py/gguf/__init__.py +1 -1
- bigdl/cpp/gguf-py/gguf/constants.py +414 -89
- bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +77 -14
- bigdl/cpp/gguf-py/gguf/lazy.py +3 -1
- bigdl/cpp/gguf-py/gguf/metadata.py +195 -76
- bigdl/cpp/gguf-py/gguf/quants.py +1210 -64
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +156 -34
- bigdl/cpp/gguf-py/gguf/utility.py +1 -1
- bigdl/cpp/gguf-py/gguf/vocab.py +325 -3
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/ggml-base.dll +0 -0
- bigdl/cpp/libs/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/libc++.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama-lib.exe +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/ollama_ggml.dll +0 -0
- bigdl/cpp/libs/ollama_llama.dll +0 -0
- bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
- {bigdl_core_cpp-2.1.0b20240820.post1.data → bigdl_core_cpp-2.2.0b20250217.post0.data}/scripts/init-llama-cpp.bat +7 -2
- bigdl_core_cpp-2.2.0b20250217.post0.data/scripts/init-ollama.bat +16 -0
- {bigdl_core_cpp-2.1.0b20240820.post1.dist-info → bigdl_core_cpp-2.2.0b20250217.post0.dist-info}/METADATA +9 -5
- bigdl_core_cpp-2.2.0b20250217.post0.dist-info/RECORD +56 -0
- {bigdl_core_cpp-2.1.0b20240820.post1.dist-info → bigdl_core_cpp-2.2.0b20250217.post0.dist-info}/WHEEL +1 -1
- bigdl/cpp/convert.py +0 -1714
- bigdl/cpp/libs/baby-llama.exe +0 -0
- bigdl/cpp/libs/batched-bench.exe +0 -0
- bigdl/cpp/libs/batched.exe +0 -0
- bigdl/cpp/libs/beam-search.exe +0 -0
- bigdl/cpp/libs/benchmark.exe +0 -0
- bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/embedding.exe +0 -0
- bigdl/cpp/libs/export-lora.exe +0 -0
- bigdl/cpp/libs/finetune.exe +0 -0
- bigdl/cpp/libs/ggml_shared.dll +0 -0
- bigdl/cpp/libs/gguf.exe +0 -0
- bigdl/cpp/libs/gritlm.exe +0 -0
- bigdl/cpp/libs/imatrix.exe +0 -0
- bigdl/cpp/libs/infill.exe +0 -0
- bigdl/cpp/libs/llava-cli.exe +0 -0
- bigdl/cpp/libs/lookahead.exe +0 -0
- bigdl/cpp/libs/lookup.exe +0 -0
- bigdl/cpp/libs/ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/main.exe +0 -0
- bigdl/cpp/libs/parallel.exe +0 -0
- bigdl/cpp/libs/passkey.exe +0 -0
- bigdl/cpp/libs/perplexity.exe +0 -0
- bigdl/cpp/libs/q8dot.exe +0 -0
- bigdl/cpp/libs/quantize-stats.exe +0 -0
- bigdl/cpp/libs/quantize.exe +0 -0
- bigdl/cpp/libs/save-load-state.exe +0 -0
- bigdl/cpp/libs/server.exe +0 -0
- bigdl/cpp/libs/simple.exe +0 -0
- bigdl/cpp/libs/speculative.exe +0 -0
- bigdl/cpp/libs/tokenize.exe +0 -0
- bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
- bigdl/cpp/libs/vdot.exe +0 -0
- bigdl_core_cpp-2.1.0b20240820.post1.data/scripts/init-ollama.bat +0 -13
- bigdl_core_cpp-2.1.0b20240820.post1.dist-info/RECORD +0 -63
- {bigdl_core_cpp-2.1.0b20240820.post1.data → bigdl_core_cpp-2.2.0b20250217.post0.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.1.0b20240820.post1.dist-info → bigdl_core_cpp-2.2.0b20250217.post0.dist-info}/top_level.txt +0 -0
@@ -64,15 +64,27 @@ class Keys:
|
|
64
64
|
BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
|
65
65
|
BASE_MODEL_VERSION = "general.base_model.{id}.version"
|
66
66
|
BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
|
67
|
+
BASE_MODEL_DESCRIPTION = "general.base_model.{id}.description"
|
67
68
|
BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
|
68
69
|
BASE_MODEL_DOI = "general.base_model.{id}.doi"
|
69
70
|
BASE_MODEL_UUID = "general.base_model.{id}.uuid"
|
70
71
|
BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
|
71
72
|
|
73
|
+
# Dataset Source
|
74
|
+
DATASET_COUNT = "general.dataset.count"
|
75
|
+
DATASET_NAME = "general.dataset.{id}.name"
|
76
|
+
DATASET_AUTHOR = "general.dataset.{id}.author"
|
77
|
+
DATASET_VERSION = "general.dataset.{id}.version"
|
78
|
+
DATASET_ORGANIZATION = "general.dataset.{id}.organization"
|
79
|
+
DATASET_DESCRIPTION = "general.dataset.{id}.description"
|
80
|
+
DATASET_URL = "general.dataset.{id}.url" # Model Website/Paper
|
81
|
+
DATASET_DOI = "general.dataset.{id}.doi"
|
82
|
+
DATASET_UUID = "general.dataset.{id}.uuid"
|
83
|
+
DATASET_REPO_URL = "general.dataset.{id}.repo_url" # Model Source Repository (git/svn/etc...)
|
84
|
+
|
72
85
|
# Array based KV stores
|
73
86
|
TAGS = "general.tags"
|
74
87
|
LANGUAGES = "general.languages"
|
75
|
-
DATASETS = "general.datasets"
|
76
88
|
|
77
89
|
class LLM:
|
78
90
|
VOCAB_SIZE = "{arch}.vocab_size"
|
@@ -94,6 +106,12 @@ class Keys:
|
|
94
106
|
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
|
95
107
|
ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
|
96
108
|
FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
|
109
|
+
SWIN_NORM = "{arch}.swin_norm"
|
110
|
+
RESCALE_EVERY_N_LAYERS = "{arch}.rescale_every_n_layers"
|
111
|
+
TIME_MIX_EXTRA_DIM = "{arch}.time_mix_extra_dim"
|
112
|
+
TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
|
113
|
+
RESIDUAL_SCALE = "{arch}.residual_scale"
|
114
|
+
EMBEDDING_SCALE = "{arch}.embedding_scale"
|
97
115
|
|
98
116
|
class Attention:
|
99
117
|
HEAD_COUNT = "{arch}.attention.head_count"
|
@@ -109,9 +127,11 @@ class Keys:
|
|
109
127
|
KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
|
110
128
|
REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
|
111
129
|
SLIDING_WINDOW = "{arch}.attention.sliding_window"
|
130
|
+
SCALE = "{arch}.attention.scale"
|
112
131
|
|
113
132
|
class Rope:
|
114
133
|
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
134
|
+
DIMENSION_SECTIONS = "{arch}.rope.dimension_sections"
|
115
135
|
FREQ_BASE = "{arch}.rope.freq_base"
|
116
136
|
SCALING_TYPE = "{arch}.rope.scaling.type"
|
117
137
|
SCALING_FACTOR = "{arch}.rope.scaling.factor"
|
@@ -130,6 +150,10 @@ class Keys:
|
|
130
150
|
INNER_SIZE = "{arch}.ssm.inner_size"
|
131
151
|
STATE_SIZE = "{arch}.ssm.state_size"
|
132
152
|
TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
|
153
|
+
DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
|
154
|
+
|
155
|
+
class WKV:
|
156
|
+
HEAD_SIZE = "{arch}.wkv.head_size"
|
133
157
|
|
134
158
|
class Tokenizer:
|
135
159
|
MODEL = "tokenizer.ggml.model"
|
@@ -141,6 +165,8 @@ class Keys:
|
|
141
165
|
MERGES = "tokenizer.ggml.merges"
|
142
166
|
BOS_ID = "tokenizer.ggml.bos_token_id"
|
143
167
|
EOS_ID = "tokenizer.ggml.eos_token_id"
|
168
|
+
EOT_ID = "tokenizer.ggml.eot_token_id"
|
169
|
+
EOM_ID = "tokenizer.ggml.eom_token_id"
|
144
170
|
UNK_ID = "tokenizer.ggml.unknown_token_id"
|
145
171
|
SEP_ID = "tokenizer.ggml.seperator_token_id"
|
146
172
|
PAD_ID = "tokenizer.ggml.padding_token_id"
|
@@ -157,10 +183,16 @@ class Keys:
|
|
157
183
|
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
|
158
184
|
CHAT_TEMPLATES = "tokenizer.chat_templates"
|
159
185
|
# FIM/Infill special tokens constants
|
186
|
+
FIM_PRE_ID = "tokenizer.ggml.fim_pre_token_id"
|
187
|
+
FIM_SUF_ID = "tokenizer.ggml.fim_suf_token_id"
|
188
|
+
FIM_MID_ID = "tokenizer.ggml.fim_mid_token_id"
|
189
|
+
FIM_PAD_ID = "tokenizer.ggml.fim_pad_token_id"
|
190
|
+
FIM_REP_ID = "tokenizer.ggml.fim_rep_token_id"
|
191
|
+
FIM_SEP_ID = "tokenizer.ggml.fim_sep_token_id"
|
192
|
+
# deprecated:
|
160
193
|
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
|
161
194
|
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
|
162
195
|
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
|
163
|
-
EOT_ID = "tokenizer.ggml.eot_token_id"
|
164
196
|
|
165
197
|
class Adapter:
|
166
198
|
TYPE = "adapter.type"
|
@@ -195,6 +227,7 @@ class MODEL_ARCH(IntEnum):
|
|
195
227
|
QWEN = auto()
|
196
228
|
QWEN2 = auto()
|
197
229
|
QWEN2MOE = auto()
|
230
|
+
QWEN2VL = auto()
|
198
231
|
PHI2 = auto()
|
199
232
|
PHI3 = auto()
|
200
233
|
PLAMO = auto()
|
@@ -202,21 +235,31 @@ class MODEL_ARCH(IntEnum):
|
|
202
235
|
ORION = auto()
|
203
236
|
INTERNLM2 = auto()
|
204
237
|
MINICPM = auto()
|
238
|
+
MINICPM3 = auto()
|
205
239
|
GEMMA = auto()
|
206
240
|
GEMMA2 = auto()
|
207
241
|
STARCODER2 = auto()
|
242
|
+
RWKV6 = auto()
|
208
243
|
MAMBA = auto()
|
209
244
|
XVERSE = auto()
|
210
245
|
COMMAND_R = auto()
|
211
246
|
DBRX = auto()
|
212
247
|
OLMO = auto()
|
248
|
+
OLMO2 = auto()
|
249
|
+
OLMOE = auto()
|
213
250
|
OPENELM = auto()
|
214
251
|
ARCTIC = auto()
|
215
252
|
DEEPSEEK2 = auto()
|
216
253
|
CHATGLM = auto()
|
217
254
|
BITNET = auto()
|
218
255
|
T5 = auto()
|
256
|
+
T5ENCODER = auto()
|
219
257
|
JAIS = auto()
|
258
|
+
NEMOTRON = auto()
|
259
|
+
EXAONE = auto()
|
260
|
+
GRANITE = auto()
|
261
|
+
GRANITE_MOE = auto()
|
262
|
+
CHAMELEON = auto()
|
220
263
|
|
221
264
|
|
222
265
|
class MODEL_TENSOR(IntEnum):
|
@@ -265,6 +308,29 @@ class MODEL_TENSOR(IntEnum):
|
|
265
308
|
SSM_A = auto()
|
266
309
|
SSM_D = auto()
|
267
310
|
SSM_OUT = auto()
|
311
|
+
TIME_MIX_W1 = auto()
|
312
|
+
TIME_MIX_W2 = auto()
|
313
|
+
TIME_MIX_LERP_X = auto()
|
314
|
+
TIME_MIX_LERP_K = auto()
|
315
|
+
TIME_MIX_LERP_V = auto()
|
316
|
+
TIME_MIX_LERP_R = auto()
|
317
|
+
TIME_MIX_LERP_G = auto()
|
318
|
+
TIME_MIX_LERP_W = auto()
|
319
|
+
TIME_MIX_FIRST = auto()
|
320
|
+
TIME_MIX_DECAY = auto()
|
321
|
+
TIME_MIX_DECAY_W1 = auto()
|
322
|
+
TIME_MIX_DECAY_W2 = auto()
|
323
|
+
TIME_MIX_KEY = auto()
|
324
|
+
TIME_MIX_VALUE = auto()
|
325
|
+
TIME_MIX_RECEPTANCE = auto()
|
326
|
+
TIME_MIX_GATE = auto()
|
327
|
+
TIME_MIX_LN = auto()
|
328
|
+
TIME_MIX_OUTPUT = auto()
|
329
|
+
CHANNEL_MIX_LERP_K = auto()
|
330
|
+
CHANNEL_MIX_LERP_R = auto()
|
331
|
+
CHANNEL_MIX_KEY = auto()
|
332
|
+
CHANNEL_MIX_RECEPTANCE = auto()
|
333
|
+
CHANNEL_MIX_VALUE = auto()
|
268
334
|
ATTN_Q_A = auto()
|
269
335
|
ATTN_Q_B = auto()
|
270
336
|
ATTN_KV_A_MQA = auto()
|
@@ -301,6 +367,8 @@ class MODEL_TENSOR(IntEnum):
|
|
301
367
|
ENC_FFN_DOWN = auto()
|
302
368
|
ENC_FFN_UP = auto()
|
303
369
|
ENC_OUTPUT_NORM = auto()
|
370
|
+
CLS = auto() # classifier
|
371
|
+
CLS_OUT = auto() # classifier output projection
|
304
372
|
|
305
373
|
|
306
374
|
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
@@ -322,6 +390,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
322
390
|
MODEL_ARCH.QWEN: "qwen",
|
323
391
|
MODEL_ARCH.QWEN2: "qwen2",
|
324
392
|
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
393
|
+
MODEL_ARCH.QWEN2VL: "qwen2vl",
|
325
394
|
MODEL_ARCH.PHI2: "phi2",
|
326
395
|
MODEL_ARCH.PHI3: "phi3",
|
327
396
|
MODEL_ARCH.PLAMO: "plamo",
|
@@ -329,105 +398,140 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
329
398
|
MODEL_ARCH.ORION: "orion",
|
330
399
|
MODEL_ARCH.INTERNLM2: "internlm2",
|
331
400
|
MODEL_ARCH.MINICPM: "minicpm",
|
401
|
+
MODEL_ARCH.MINICPM3: "minicpm3",
|
332
402
|
MODEL_ARCH.GEMMA: "gemma",
|
333
403
|
MODEL_ARCH.GEMMA2: "gemma2",
|
334
404
|
MODEL_ARCH.STARCODER2: "starcoder2",
|
405
|
+
MODEL_ARCH.RWKV6: "rwkv6",
|
335
406
|
MODEL_ARCH.MAMBA: "mamba",
|
336
407
|
MODEL_ARCH.XVERSE: "xverse",
|
337
408
|
MODEL_ARCH.COMMAND_R: "command-r",
|
338
409
|
MODEL_ARCH.DBRX: "dbrx",
|
339
410
|
MODEL_ARCH.OLMO: "olmo",
|
411
|
+
MODEL_ARCH.OLMO2: "olmo2",
|
412
|
+
MODEL_ARCH.OLMOE: "olmoe",
|
340
413
|
MODEL_ARCH.OPENELM: "openelm",
|
341
414
|
MODEL_ARCH.ARCTIC: "arctic",
|
342
415
|
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
343
416
|
MODEL_ARCH.CHATGLM: "chatglm",
|
344
417
|
MODEL_ARCH.BITNET: "bitnet",
|
345
418
|
MODEL_ARCH.T5: "t5",
|
419
|
+
MODEL_ARCH.T5ENCODER: "t5encoder",
|
346
420
|
MODEL_ARCH.JAIS: "jais",
|
421
|
+
MODEL_ARCH.NEMOTRON: "nemotron",
|
422
|
+
MODEL_ARCH.EXAONE: "exaone",
|
423
|
+
MODEL_ARCH.GRANITE: "granite",
|
424
|
+
MODEL_ARCH.GRANITE_MOE: "granitemoe",
|
425
|
+
MODEL_ARCH.CHAMELEON: "chameleon",
|
347
426
|
}
|
348
427
|
|
349
428
|
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
350
|
-
MODEL_TENSOR.TOKEN_EMBD:
|
351
|
-
MODEL_TENSOR.TOKEN_EMBD_NORM:
|
352
|
-
MODEL_TENSOR.TOKEN_TYPES:
|
353
|
-
MODEL_TENSOR.POS_EMBD:
|
354
|
-
MODEL_TENSOR.OUTPUT_NORM:
|
355
|
-
MODEL_TENSOR.OUTPUT:
|
356
|
-
MODEL_TENSOR.ROPE_FREQS:
|
357
|
-
MODEL_TENSOR.ROPE_FACTORS_LONG:
|
358
|
-
MODEL_TENSOR.ROPE_FACTORS_SHORT:
|
359
|
-
MODEL_TENSOR.ATTN_NORM:
|
360
|
-
MODEL_TENSOR.ATTN_NORM_2:
|
361
|
-
MODEL_TENSOR.ATTN_QKV:
|
362
|
-
MODEL_TENSOR.ATTN_Q:
|
363
|
-
MODEL_TENSOR.ATTN_K:
|
364
|
-
MODEL_TENSOR.ATTN_V:
|
365
|
-
MODEL_TENSOR.ATTN_OUT:
|
366
|
-
MODEL_TENSOR.ATTN_ROT_EMBD:
|
367
|
-
MODEL_TENSOR.ATTN_Q_NORM:
|
368
|
-
MODEL_TENSOR.ATTN_K_NORM:
|
369
|
-
MODEL_TENSOR.ATTN_OUT_NORM:
|
370
|
-
MODEL_TENSOR.ATTN_POST_NORM:
|
371
|
-
MODEL_TENSOR.FFN_GATE_INP:
|
372
|
-
MODEL_TENSOR.FFN_GATE_INP_SHEXP:
|
373
|
-
MODEL_TENSOR.FFN_NORM:
|
374
|
-
MODEL_TENSOR.FFN_PRE_NORM:
|
375
|
-
MODEL_TENSOR.FFN_POST_NORM:
|
376
|
-
MODEL_TENSOR.FFN_GATE:
|
377
|
-
MODEL_TENSOR.FFN_DOWN:
|
378
|
-
MODEL_TENSOR.FFN_UP:
|
379
|
-
MODEL_TENSOR.FFN_GATE_SHEXP:
|
380
|
-
MODEL_TENSOR.FFN_DOWN_SHEXP:
|
381
|
-
MODEL_TENSOR.FFN_UP_SHEXP:
|
382
|
-
MODEL_TENSOR.FFN_ACT:
|
383
|
-
MODEL_TENSOR.FFN_NORM_EXP:
|
384
|
-
MODEL_TENSOR.FFN_GATE_EXP:
|
385
|
-
MODEL_TENSOR.FFN_DOWN_EXP:
|
386
|
-
MODEL_TENSOR.FFN_UP_EXP:
|
387
|
-
MODEL_TENSOR.LAYER_OUT_NORM:
|
388
|
-
MODEL_TENSOR.SSM_IN:
|
389
|
-
MODEL_TENSOR.SSM_CONV1D:
|
390
|
-
MODEL_TENSOR.SSM_X:
|
391
|
-
MODEL_TENSOR.SSM_DT:
|
392
|
-
MODEL_TENSOR.SSM_A:
|
393
|
-
MODEL_TENSOR.SSM_D:
|
394
|
-
MODEL_TENSOR.SSM_OUT:
|
395
|
-
MODEL_TENSOR.
|
396
|
-
MODEL_TENSOR.
|
397
|
-
MODEL_TENSOR.
|
398
|
-
MODEL_TENSOR.
|
399
|
-
MODEL_TENSOR.
|
400
|
-
MODEL_TENSOR.
|
401
|
-
MODEL_TENSOR.
|
402
|
-
MODEL_TENSOR.
|
403
|
-
MODEL_TENSOR.
|
404
|
-
MODEL_TENSOR.
|
405
|
-
MODEL_TENSOR.
|
406
|
-
MODEL_TENSOR.
|
407
|
-
MODEL_TENSOR.
|
408
|
-
MODEL_TENSOR.
|
409
|
-
MODEL_TENSOR.
|
410
|
-
MODEL_TENSOR.
|
411
|
-
MODEL_TENSOR.
|
412
|
-
MODEL_TENSOR.
|
413
|
-
MODEL_TENSOR.
|
414
|
-
MODEL_TENSOR.
|
415
|
-
MODEL_TENSOR.
|
416
|
-
MODEL_TENSOR.
|
417
|
-
MODEL_TENSOR.
|
418
|
-
MODEL_TENSOR.
|
419
|
-
MODEL_TENSOR.
|
420
|
-
MODEL_TENSOR.
|
421
|
-
MODEL_TENSOR.
|
422
|
-
MODEL_TENSOR.
|
423
|
-
MODEL_TENSOR.
|
424
|
-
MODEL_TENSOR.
|
425
|
-
MODEL_TENSOR.
|
426
|
-
MODEL_TENSOR.
|
427
|
-
MODEL_TENSOR.
|
428
|
-
MODEL_TENSOR.
|
429
|
-
MODEL_TENSOR.
|
430
|
-
MODEL_TENSOR.
|
429
|
+
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
430
|
+
MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
|
431
|
+
MODEL_TENSOR.TOKEN_TYPES: "token_types",
|
432
|
+
MODEL_TENSOR.POS_EMBD: "position_embd",
|
433
|
+
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
434
|
+
MODEL_TENSOR.OUTPUT: "output",
|
435
|
+
MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
|
436
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
|
437
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
|
438
|
+
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
439
|
+
MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
|
440
|
+
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
|
441
|
+
MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
|
442
|
+
MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
|
443
|
+
MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
|
444
|
+
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
|
445
|
+
MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
|
446
|
+
MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
|
447
|
+
MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
|
448
|
+
MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
|
449
|
+
MODEL_TENSOR.ATTN_POST_NORM: "blk.{bid}.post_attention_norm",
|
450
|
+
MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
|
451
|
+
MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
|
452
|
+
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
|
453
|
+
MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
|
454
|
+
MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
|
455
|
+
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
|
456
|
+
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
457
|
+
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
458
|
+
MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
|
459
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
|
460
|
+
MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
|
461
|
+
MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
|
462
|
+
MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
|
463
|
+
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
|
464
|
+
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
|
465
|
+
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
|
466
|
+
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
467
|
+
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
468
|
+
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
469
|
+
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
|
470
|
+
MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
|
471
|
+
MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
|
472
|
+
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
|
473
|
+
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
|
474
|
+
MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
|
475
|
+
MODEL_TENSOR.TIME_MIX_W2: "blk.{bid}.time_mix_w2",
|
476
|
+
MODEL_TENSOR.TIME_MIX_LERP_X: "blk.{bid}.time_mix_lerp_x",
|
477
|
+
MODEL_TENSOR.TIME_MIX_LERP_K: "blk.{bid}.time_mix_lerp_k",
|
478
|
+
MODEL_TENSOR.TIME_MIX_LERP_V: "blk.{bid}.time_mix_lerp_v",
|
479
|
+
MODEL_TENSOR.TIME_MIX_LERP_R: "blk.{bid}.time_mix_lerp_r",
|
480
|
+
MODEL_TENSOR.TIME_MIX_LERP_G: "blk.{bid}.time_mix_lerp_g",
|
481
|
+
MODEL_TENSOR.TIME_MIX_LERP_W: "blk.{bid}.time_mix_lerp_w",
|
482
|
+
MODEL_TENSOR.TIME_MIX_FIRST: "blk.{bid}.time_mix_first",
|
483
|
+
MODEL_TENSOR.TIME_MIX_DECAY: "blk.{bid}.time_mix_decay",
|
484
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W1: "blk.{bid}.time_mix_decay_w1",
|
485
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W2: "blk.{bid}.time_mix_decay_w2",
|
486
|
+
MODEL_TENSOR.TIME_MIX_KEY: "blk.{bid}.time_mix_key",
|
487
|
+
MODEL_TENSOR.TIME_MIX_VALUE: "blk.{bid}.time_mix_value",
|
488
|
+
MODEL_TENSOR.TIME_MIX_RECEPTANCE: "blk.{bid}.time_mix_receptance",
|
489
|
+
MODEL_TENSOR.TIME_MIX_GATE: "blk.{bid}.time_mix_gate",
|
490
|
+
MODEL_TENSOR.TIME_MIX_LN: "blk.{bid}.time_mix_ln",
|
491
|
+
MODEL_TENSOR.TIME_MIX_OUTPUT: "blk.{bid}.time_mix_output",
|
492
|
+
MODEL_TENSOR.CHANNEL_MIX_LERP_K: "blk.{bid}.channel_mix_lerp_k",
|
493
|
+
MODEL_TENSOR.CHANNEL_MIX_LERP_R: "blk.{bid}.channel_mix_lerp_r",
|
494
|
+
MODEL_TENSOR.CHANNEL_MIX_KEY: "blk.{bid}.channel_mix_key",
|
495
|
+
MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: "blk.{bid}.channel_mix_receptance",
|
496
|
+
MODEL_TENSOR.CHANNEL_MIX_VALUE: "blk.{bid}.channel_mix_value",
|
497
|
+
MODEL_TENSOR.ATTN_Q_A: "blk.{bid}.attn_q_a",
|
498
|
+
MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
|
499
|
+
MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
|
500
|
+
MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
|
501
|
+
MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
|
502
|
+
MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
|
503
|
+
MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
|
504
|
+
MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
|
505
|
+
MODEL_TENSOR.DEC_ATTN_NORM: "dec.blk.{bid}.attn_norm",
|
506
|
+
MODEL_TENSOR.DEC_ATTN_Q: "dec.blk.{bid}.attn_q",
|
507
|
+
MODEL_TENSOR.DEC_ATTN_K: "dec.blk.{bid}.attn_k",
|
508
|
+
MODEL_TENSOR.DEC_ATTN_V: "dec.blk.{bid}.attn_v",
|
509
|
+
MODEL_TENSOR.DEC_ATTN_OUT: "dec.blk.{bid}.attn_o",
|
510
|
+
MODEL_TENSOR.DEC_ATTN_REL_B: "dec.blk.{bid}.attn_rel_b",
|
511
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_NORM: "dec.blk.{bid}.cross_attn_norm",
|
512
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_Q: "dec.blk.{bid}.cross_attn_q",
|
513
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_K: "dec.blk.{bid}.cross_attn_k",
|
514
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_V: "dec.blk.{bid}.cross_attn_v",
|
515
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_OUT: "dec.blk.{bid}.cross_attn_o",
|
516
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: "dec.blk.{bid}.cross_attn_rel_b",
|
517
|
+
MODEL_TENSOR.DEC_FFN_NORM: "dec.blk.{bid}.ffn_norm",
|
518
|
+
MODEL_TENSOR.DEC_FFN_GATE: "dec.blk.{bid}.ffn_gate",
|
519
|
+
MODEL_TENSOR.DEC_FFN_DOWN: "dec.blk.{bid}.ffn_down",
|
520
|
+
MODEL_TENSOR.DEC_FFN_UP: "dec.blk.{bid}.ffn_up",
|
521
|
+
MODEL_TENSOR.DEC_OUTPUT_NORM: "dec.output_norm",
|
522
|
+
MODEL_TENSOR.ENC_ATTN_NORM: "enc.blk.{bid}.attn_norm",
|
523
|
+
MODEL_TENSOR.ENC_ATTN_Q: "enc.blk.{bid}.attn_q",
|
524
|
+
MODEL_TENSOR.ENC_ATTN_K: "enc.blk.{bid}.attn_k",
|
525
|
+
MODEL_TENSOR.ENC_ATTN_V: "enc.blk.{bid}.attn_v",
|
526
|
+
MODEL_TENSOR.ENC_ATTN_OUT: "enc.blk.{bid}.attn_o",
|
527
|
+
MODEL_TENSOR.ENC_ATTN_REL_B: "enc.blk.{bid}.attn_rel_b",
|
528
|
+
MODEL_TENSOR.ENC_FFN_NORM: "enc.blk.{bid}.ffn_norm",
|
529
|
+
MODEL_TENSOR.ENC_FFN_GATE: "enc.blk.{bid}.ffn_gate",
|
530
|
+
MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
|
531
|
+
MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
|
532
|
+
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
|
533
|
+
MODEL_TENSOR.CLS: "cls",
|
534
|
+
MODEL_TENSOR.CLS_OUT: "cls.output",
|
431
535
|
}
|
432
536
|
|
433
537
|
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
@@ -537,6 +641,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
537
641
|
MODEL_TENSOR.FFN_DOWN,
|
538
642
|
MODEL_TENSOR.FFN_UP,
|
539
643
|
MODEL_TENSOR.LAYER_OUT_NORM,
|
644
|
+
MODEL_TENSOR.CLS,
|
645
|
+
MODEL_TENSOR.CLS_OUT,
|
540
646
|
],
|
541
647
|
MODEL_ARCH.NOMIC_BERT: [
|
542
648
|
MODEL_TENSOR.TOKEN_EMBD,
|
@@ -568,6 +674,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
568
674
|
MODEL_TENSOR.FFN_GATE,
|
569
675
|
MODEL_TENSOR.FFN_DOWN,
|
570
676
|
MODEL_TENSOR.LAYER_OUT_NORM,
|
677
|
+
MODEL_TENSOR.CLS,
|
571
678
|
],
|
572
679
|
MODEL_ARCH.MPT: [
|
573
680
|
MODEL_TENSOR.TOKEN_EMBD,
|
@@ -654,6 +761,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
654
761
|
MODEL_TENSOR.FFN_UP,
|
655
762
|
],
|
656
763
|
MODEL_ARCH.QWEN2: [
|
764
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
765
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
766
|
+
MODEL_TENSOR.OUTPUT,
|
767
|
+
MODEL_TENSOR.ROPE_FREQS,
|
768
|
+
MODEL_TENSOR.ATTN_NORM,
|
769
|
+
MODEL_TENSOR.ATTN_Q,
|
770
|
+
MODEL_TENSOR.ATTN_K,
|
771
|
+
MODEL_TENSOR.ATTN_V,
|
772
|
+
MODEL_TENSOR.ATTN_OUT,
|
773
|
+
MODEL_TENSOR.FFN_NORM,
|
774
|
+
MODEL_TENSOR.FFN_GATE,
|
775
|
+
MODEL_TENSOR.FFN_DOWN,
|
776
|
+
MODEL_TENSOR.FFN_UP,
|
777
|
+
],
|
778
|
+
MODEL_ARCH.QWEN2VL: [
|
657
779
|
MODEL_TENSOR.TOKEN_EMBD,
|
658
780
|
MODEL_TENSOR.OUTPUT_NORM,
|
659
781
|
MODEL_TENSOR.OUTPUT,
|
@@ -731,6 +853,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
731
853
|
MODEL_TENSOR.TOKEN_EMBD,
|
732
854
|
MODEL_TENSOR.OUTPUT_NORM,
|
733
855
|
MODEL_TENSOR.OUTPUT,
|
856
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
857
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
734
858
|
MODEL_TENSOR.ATTN_NORM,
|
735
859
|
MODEL_TENSOR.ATTN_QKV,
|
736
860
|
MODEL_TENSOR.ATTN_Q,
|
@@ -790,6 +914,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
790
914
|
MODEL_TENSOR.OUTPUT,
|
791
915
|
MODEL_TENSOR.OUTPUT_NORM,
|
792
916
|
MODEL_TENSOR.ROPE_FREQS,
|
917
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
918
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
793
919
|
MODEL_TENSOR.ATTN_NORM,
|
794
920
|
MODEL_TENSOR.ATTN_Q,
|
795
921
|
MODEL_TENSOR.ATTN_K,
|
@@ -805,6 +931,25 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
805
931
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
806
932
|
MODEL_TENSOR.FFN_UP_EXP,
|
807
933
|
],
|
934
|
+
MODEL_ARCH.MINICPM3: [
|
935
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
936
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
937
|
+
MODEL_TENSOR.OUTPUT,
|
938
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
939
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
940
|
+
MODEL_TENSOR.ATTN_NORM,
|
941
|
+
MODEL_TENSOR.ATTN_Q_A,
|
942
|
+
MODEL_TENSOR.ATTN_Q_B,
|
943
|
+
MODEL_TENSOR.ATTN_KV_A_MQA,
|
944
|
+
MODEL_TENSOR.ATTN_KV_B,
|
945
|
+
MODEL_TENSOR.ATTN_Q_A_NORM,
|
946
|
+
MODEL_TENSOR.ATTN_KV_A_NORM,
|
947
|
+
MODEL_TENSOR.ATTN_OUT,
|
948
|
+
MODEL_TENSOR.FFN_NORM,
|
949
|
+
MODEL_TENSOR.FFN_GATE,
|
950
|
+
MODEL_TENSOR.FFN_DOWN,
|
951
|
+
MODEL_TENSOR.FFN_UP,
|
952
|
+
],
|
808
953
|
MODEL_ARCH.GEMMA: [
|
809
954
|
MODEL_TENSOR.TOKEN_EMBD,
|
810
955
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -848,6 +993,37 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
848
993
|
MODEL_TENSOR.FFN_DOWN,
|
849
994
|
MODEL_TENSOR.FFN_UP,
|
850
995
|
],
|
996
|
+
MODEL_ARCH.RWKV6: [
|
997
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
998
|
+
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
999
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1000
|
+
MODEL_TENSOR.OUTPUT,
|
1001
|
+
MODEL_TENSOR.ATTN_NORM,
|
1002
|
+
MODEL_TENSOR.ATTN_NORM_2,
|
1003
|
+
MODEL_TENSOR.TIME_MIX_W1,
|
1004
|
+
MODEL_TENSOR.TIME_MIX_W2,
|
1005
|
+
MODEL_TENSOR.TIME_MIX_LERP_X,
|
1006
|
+
MODEL_TENSOR.TIME_MIX_LERP_K,
|
1007
|
+
MODEL_TENSOR.TIME_MIX_LERP_V,
|
1008
|
+
MODEL_TENSOR.TIME_MIX_LERP_R,
|
1009
|
+
MODEL_TENSOR.TIME_MIX_LERP_G,
|
1010
|
+
MODEL_TENSOR.TIME_MIX_LERP_W,
|
1011
|
+
MODEL_TENSOR.TIME_MIX_FIRST,
|
1012
|
+
MODEL_TENSOR.TIME_MIX_DECAY,
|
1013
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W1,
|
1014
|
+
MODEL_TENSOR.TIME_MIX_DECAY_W2,
|
1015
|
+
MODEL_TENSOR.TIME_MIX_KEY,
|
1016
|
+
MODEL_TENSOR.TIME_MIX_VALUE,
|
1017
|
+
MODEL_TENSOR.TIME_MIX_RECEPTANCE,
|
1018
|
+
MODEL_TENSOR.TIME_MIX_GATE,
|
1019
|
+
MODEL_TENSOR.TIME_MIX_LN,
|
1020
|
+
MODEL_TENSOR.TIME_MIX_OUTPUT,
|
1021
|
+
MODEL_TENSOR.CHANNEL_MIX_LERP_K,
|
1022
|
+
MODEL_TENSOR.CHANNEL_MIX_LERP_R,
|
1023
|
+
MODEL_TENSOR.CHANNEL_MIX_KEY,
|
1024
|
+
MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE,
|
1025
|
+
MODEL_TENSOR.CHANNEL_MIX_VALUE,
|
1026
|
+
],
|
851
1027
|
MODEL_ARCH.MAMBA: [
|
852
1028
|
MODEL_TENSOR.TOKEN_EMBD,
|
853
1029
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -915,6 +1091,39 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
915
1091
|
MODEL_TENSOR.FFN_DOWN,
|
916
1092
|
MODEL_TENSOR.FFN_UP,
|
917
1093
|
],
|
1094
|
+
MODEL_ARCH.OLMO2: [
|
1095
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1096
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1097
|
+
MODEL_TENSOR.OUTPUT,
|
1098
|
+
MODEL_TENSOR.ATTN_Q,
|
1099
|
+
MODEL_TENSOR.ATTN_K,
|
1100
|
+
MODEL_TENSOR.ATTN_V,
|
1101
|
+
MODEL_TENSOR.ATTN_OUT,
|
1102
|
+
MODEL_TENSOR.ATTN_POST_NORM,
|
1103
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
1104
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
1105
|
+
MODEL_TENSOR.FFN_POST_NORM,
|
1106
|
+
MODEL_TENSOR.FFN_GATE,
|
1107
|
+
MODEL_TENSOR.FFN_DOWN,
|
1108
|
+
MODEL_TENSOR.FFN_UP,
|
1109
|
+
],
|
1110
|
+
MODEL_ARCH.OLMOE: [
|
1111
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1112
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1113
|
+
MODEL_TENSOR.OUTPUT,
|
1114
|
+
MODEL_TENSOR.ATTN_OUT,
|
1115
|
+
MODEL_TENSOR.ATTN_Q,
|
1116
|
+
MODEL_TENSOR.ATTN_K,
|
1117
|
+
MODEL_TENSOR.ATTN_V,
|
1118
|
+
MODEL_TENSOR.ATTN_NORM,
|
1119
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
1120
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
1121
|
+
MODEL_TENSOR.FFN_NORM,
|
1122
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
1123
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
1124
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
1125
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
1126
|
+
],
|
918
1127
|
MODEL_ARCH.OPENELM: [
|
919
1128
|
MODEL_TENSOR.TOKEN_EMBD,
|
920
1129
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -1035,6 +1244,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1035
1244
|
MODEL_TENSOR.ENC_FFN_UP,
|
1036
1245
|
MODEL_TENSOR.ENC_OUTPUT_NORM,
|
1037
1246
|
],
|
1247
|
+
MODEL_ARCH.T5ENCODER: [
|
1248
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1249
|
+
MODEL_TENSOR.OUTPUT,
|
1250
|
+
MODEL_TENSOR.ENC_ATTN_NORM,
|
1251
|
+
MODEL_TENSOR.ENC_ATTN_Q,
|
1252
|
+
MODEL_TENSOR.ENC_ATTN_K,
|
1253
|
+
MODEL_TENSOR.ENC_ATTN_V,
|
1254
|
+
MODEL_TENSOR.ENC_ATTN_OUT,
|
1255
|
+
MODEL_TENSOR.ENC_ATTN_REL_B,
|
1256
|
+
MODEL_TENSOR.ENC_FFN_NORM,
|
1257
|
+
MODEL_TENSOR.ENC_FFN_GATE,
|
1258
|
+
MODEL_TENSOR.ENC_FFN_DOWN,
|
1259
|
+
MODEL_TENSOR.ENC_FFN_UP,
|
1260
|
+
MODEL_TENSOR.ENC_OUTPUT_NORM,
|
1261
|
+
],
|
1038
1262
|
MODEL_ARCH.JAIS: [
|
1039
1263
|
MODEL_TENSOR.TOKEN_EMBD,
|
1040
1264
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -1047,6 +1271,82 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1047
1271
|
MODEL_TENSOR.FFN_GATE,
|
1048
1272
|
MODEL_TENSOR.FFN_UP,
|
1049
1273
|
],
|
1274
|
+
MODEL_ARCH.NEMOTRON: [
|
1275
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1276
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1277
|
+
MODEL_TENSOR.OUTPUT,
|
1278
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1279
|
+
MODEL_TENSOR.ATTN_NORM,
|
1280
|
+
MODEL_TENSOR.ATTN_Q,
|
1281
|
+
MODEL_TENSOR.ATTN_K,
|
1282
|
+
MODEL_TENSOR.ATTN_V,
|
1283
|
+
MODEL_TENSOR.ATTN_OUT,
|
1284
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1285
|
+
MODEL_TENSOR.FFN_NORM,
|
1286
|
+
MODEL_TENSOR.FFN_DOWN,
|
1287
|
+
MODEL_TENSOR.FFN_UP,
|
1288
|
+
],
|
1289
|
+
MODEL_ARCH.EXAONE: [
|
1290
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1291
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1292
|
+
MODEL_TENSOR.OUTPUT,
|
1293
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1294
|
+
MODEL_TENSOR.ATTN_NORM,
|
1295
|
+
MODEL_TENSOR.ATTN_Q,
|
1296
|
+
MODEL_TENSOR.ATTN_K,
|
1297
|
+
MODEL_TENSOR.ATTN_V,
|
1298
|
+
MODEL_TENSOR.ATTN_OUT,
|
1299
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1300
|
+
MODEL_TENSOR.FFN_NORM,
|
1301
|
+
MODEL_TENSOR.FFN_GATE,
|
1302
|
+
MODEL_TENSOR.FFN_DOWN,
|
1303
|
+
MODEL_TENSOR.FFN_UP,
|
1304
|
+
],
|
1305
|
+
MODEL_ARCH.GRANITE: [
|
1306
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1307
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1308
|
+
MODEL_TENSOR.OUTPUT,
|
1309
|
+
MODEL_TENSOR.ATTN_NORM,
|
1310
|
+
MODEL_TENSOR.ATTN_Q,
|
1311
|
+
MODEL_TENSOR.ATTN_K,
|
1312
|
+
MODEL_TENSOR.ATTN_V,
|
1313
|
+
MODEL_TENSOR.ATTN_OUT,
|
1314
|
+
MODEL_TENSOR.FFN_NORM,
|
1315
|
+
MODEL_TENSOR.FFN_GATE,
|
1316
|
+
MODEL_TENSOR.FFN_DOWN,
|
1317
|
+
MODEL_TENSOR.FFN_UP,
|
1318
|
+
],
|
1319
|
+
MODEL_ARCH.GRANITE_MOE: [
|
1320
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1321
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1322
|
+
MODEL_TENSOR.OUTPUT,
|
1323
|
+
MODEL_TENSOR.ATTN_NORM,
|
1324
|
+
MODEL_TENSOR.ATTN_Q,
|
1325
|
+
MODEL_TENSOR.ATTN_K,
|
1326
|
+
MODEL_TENSOR.ATTN_V,
|
1327
|
+
MODEL_TENSOR.ATTN_OUT,
|
1328
|
+
MODEL_TENSOR.FFN_NORM,
|
1329
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
1330
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
1331
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
1332
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
1333
|
+
],
|
1334
|
+
MODEL_ARCH.CHAMELEON: [
|
1335
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1336
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1337
|
+
MODEL_TENSOR.OUTPUT,
|
1338
|
+
MODEL_TENSOR.ATTN_NORM,
|
1339
|
+
MODEL_TENSOR.ATTN_Q,
|
1340
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
1341
|
+
MODEL_TENSOR.ATTN_K,
|
1342
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
1343
|
+
MODEL_TENSOR.ATTN_V,
|
1344
|
+
MODEL_TENSOR.ATTN_OUT,
|
1345
|
+
MODEL_TENSOR.FFN_NORM,
|
1346
|
+
MODEL_TENSOR.FFN_GATE,
|
1347
|
+
MODEL_TENSOR.FFN_DOWN,
|
1348
|
+
MODEL_TENSOR.FFN_UP,
|
1349
|
+
],
|
1050
1350
|
# TODO
|
1051
1351
|
}
|
1052
1352
|
|
@@ -1087,6 +1387,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1087
1387
|
MODEL_ARCH.CHATGLM: [
|
1088
1388
|
MODEL_TENSOR.ROPE_FREQS,
|
1089
1389
|
],
|
1390
|
+
MODEL_ARCH.NEMOTRON: [
|
1391
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1392
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1393
|
+
],
|
1090
1394
|
}
|
1091
1395
|
|
1092
1396
|
#
|
@@ -1104,9 +1408,10 @@ class TokenType(IntEnum):
|
|
1104
1408
|
|
1105
1409
|
|
1106
1410
|
class RopeScalingType(Enum):
|
1107
|
-
NONE
|
1108
|
-
LINEAR
|
1109
|
-
YARN
|
1411
|
+
NONE = 'none'
|
1412
|
+
LINEAR = 'linear'
|
1413
|
+
YARN = 'yarn'
|
1414
|
+
LONGROPE = 'longrope'
|
1110
1415
|
|
1111
1416
|
|
1112
1417
|
class PoolingType(IntEnum):
|
@@ -1145,6 +1450,8 @@ class GGMLQuantizationType(IntEnum):
|
|
1145
1450
|
F64 = 28
|
1146
1451
|
IQ1_M = 29
|
1147
1452
|
BF16 = 30
|
1453
|
+
TQ1_0 = 34
|
1454
|
+
TQ2_0 = 35
|
1148
1455
|
|
1149
1456
|
|
1150
1457
|
# TODO: add GGMLFileType from ggml_ftype in ggml.h
|
@@ -1157,7 +1464,7 @@ class LlamaFileType(IntEnum):
|
|
1157
1464
|
MOSTLY_F16 = 1 # except 1d tensors
|
1158
1465
|
MOSTLY_Q4_0 = 2 # except 1d tensors
|
1159
1466
|
MOSTLY_Q4_1 = 3 # except 1d tensors
|
1160
|
-
MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
|
1467
|
+
# MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
|
1161
1468
|
# MOSTLY_Q4_2 = 5 # support has been removed
|
1162
1469
|
# MOSTLY_Q4_3 = 6 # support has been removed
|
1163
1470
|
MOSTLY_Q8_0 = 7 # except 1d tensors
|
@@ -1186,6 +1493,11 @@ class LlamaFileType(IntEnum):
|
|
1186
1493
|
MOSTLY_IQ4_XS = 30 # except 1d tensors
|
1187
1494
|
MOSTLY_IQ1_M = 31 # except 1d tensors
|
1188
1495
|
MOSTLY_BF16 = 32 # except 1d tensors
|
1496
|
+
# MOSTLY_Q4_0_4_4 = 33 # removed from gguf files, use Q4_0 and runtime repack
|
1497
|
+
# MOSTLY_Q4_0_4_8 = 34 # removed from gguf files, use Q4_0 and runtime repack
|
1498
|
+
# MOSTLY_Q4_0_8_8 = 35 # removed from gguf files, use Q4_0 and runtime repack
|
1499
|
+
MOSTLY_TQ1_0 = 36 # except 1d tensors
|
1500
|
+
MOSTLY_TQ2_0 = 37 # except 1d tensors
|
1189
1501
|
|
1190
1502
|
GUESSED = 1024 # not specified in the model file
|
1191
1503
|
|
@@ -1259,6 +1571,8 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|
|
1259
1571
|
GGMLQuantizationType.F64: (1, 8),
|
1260
1572
|
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
|
1261
1573
|
GGMLQuantizationType.BF16: (1, 2),
|
1574
|
+
GGMLQuantizationType.TQ1_0: (256, 2 + 4 * 13),
|
1575
|
+
GGMLQuantizationType.TQ2_0: (256, 2 + 64),
|
1262
1576
|
}
|
1263
1577
|
|
1264
1578
|
|
@@ -1306,6 +1620,7 @@ KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
|
|
1306
1620
|
KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
|
1307
1621
|
KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
|
1308
1622
|
KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
|
1623
|
+
KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
|
1309
1624
|
|
1310
1625
|
# tokenization
|
1311
1626
|
KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
|
@@ -1316,6 +1631,8 @@ KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES
|
|
1316
1631
|
KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES
|
1317
1632
|
KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID
|
1318
1633
|
KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID
|
1634
|
+
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
|
1635
|
+
KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
|
1319
1636
|
KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
|
1320
1637
|
KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
|
1321
1638
|
KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
|
@@ -1323,7 +1640,15 @@ KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
|
|
1323
1640
|
KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
|
1324
1641
|
KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
|
1325
1642
|
KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
|
1326
|
-
|
1643
|
+
|
1644
|
+
KEY_TOKENIZER_FIM_PRE_ID = Keys.Tokenizer.FIM_PRE_ID
|
1645
|
+
KEY_TOKENIZER_FIM_SUF_ID = Keys.Tokenizer.FIM_SUF_ID
|
1646
|
+
KEY_TOKENIZER_FIM_MID_ID = Keys.Tokenizer.FIM_MID_ID
|
1647
|
+
KEY_TOKENIZER_FIM_PAD_ID = Keys.Tokenizer.FIM_PAD_ID
|
1648
|
+
KEY_TOKENIZER_FIM_REP_ID = Keys.Tokenizer.FIM_REP_ID
|
1649
|
+
KEY_TOKENIZER_FIM_SEP_ID = Keys.Tokenizer.FIM_SEP_ID
|
1650
|
+
|
1651
|
+
# deprecated
|
1652
|
+
KEY_TOKENIZER_PREFIX_ID = Keys.Tokenizer.PREFIX_ID
|
1327
1653
|
KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
|
1328
1654
|
KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
|
1329
|
-
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
|