bigdl-core-cpp 2.5.0b20240725__py3-none-win_amd64.whl → 2.5.0b20240727__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert-hf-to-gguf.py +1106 -320
- bigdl/cpp/gguf-py/gguf/__init__.py +2 -0
- bigdl/cpp/gguf-py/gguf/constants.py +442 -173
- bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +29 -8
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +472 -156
- bigdl/cpp/gguf-py/gguf/lazy.py +24 -49
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +195 -23
- bigdl/cpp/libs/baby-llama.exe +0 -0
- bigdl/cpp/libs/batched-bench.exe +0 -0
- bigdl/cpp/libs/batched.exe +0 -0
- bigdl/cpp/libs/beam-search.exe +0 -0
- bigdl/cpp/libs/benchmark.exe +0 -0
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/embedding.exe +0 -0
- bigdl/cpp/libs/export-lora.exe +0 -0
- bigdl/cpp/libs/finetune.exe +0 -0
- bigdl/cpp/libs/ggml_shared.dll +0 -0
- bigdl/cpp/libs/gguf.exe +0 -0
- bigdl/cpp/libs/gritlm.exe +0 -0
- bigdl/cpp/libs/imatrix.exe +0 -0
- bigdl/cpp/libs/infill.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava-cli.exe +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/lookahead.exe +0 -0
- bigdl/cpp/libs/lookup.exe +0 -0
- bigdl/cpp/libs/ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/main.exe +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/parallel.exe +0 -0
- bigdl/cpp/libs/passkey.exe +0 -0
- bigdl/cpp/libs/perplexity.exe +0 -0
- bigdl/cpp/libs/q8dot.exe +0 -0
- bigdl/cpp/libs/quantize-stats.exe +0 -0
- bigdl/cpp/libs/quantize.exe +0 -0
- bigdl/cpp/libs/save-load-state.exe +0 -0
- bigdl/cpp/libs/server.exe +0 -0
- bigdl/cpp/libs/simple.exe +0 -0
- bigdl/cpp/libs/speculative.exe +0 -0
- bigdl/cpp/libs/tokenize.exe +0 -0
- bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
- bigdl/cpp/libs/vdot.exe +0 -0
- {bigdl_core_cpp-2.5.0b20240725.dist-info → bigdl_core_cpp-2.5.0b20240727.dist-info}/METADATA +1 -1
- bigdl_core_cpp-2.5.0b20240727.dist-info/RECORD +61 -0
- bigdl_core_cpp-2.5.0b20240725.dist-info/RECORD +0 -61
- {bigdl_core_cpp-2.5.0b20240725.data → bigdl_core_cpp-2.5.0b20240727.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.5.0b20240725.data → bigdl_core_cpp-2.5.0b20240727.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.5.0b20240725.data → bigdl_core_cpp-2.5.0b20240727.data}/scripts/init-ollama.bat +0 -0
- {bigdl_core_cpp-2.5.0b20240725.dist-info → bigdl_core_cpp-2.5.0b20240727.dist-info}/WHEEL +0 -0
- {bigdl_core_cpp-2.5.0b20240725.dist-info → bigdl_core_cpp-2.5.0b20240727.dist-info}/top_level.txt +0 -0
@@ -19,31 +19,81 @@ GGML_QUANT_VERSION = 2 # GGML_QNT_VERSION from ggml.h
|
|
19
19
|
|
20
20
|
class Keys:
|
21
21
|
class General:
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
22
|
+
TYPE = "general.type"
|
23
|
+
ARCHITECTURE = "general.architecture"
|
24
|
+
QUANTIZATION_VERSION = "general.quantization_version"
|
25
|
+
ALIGNMENT = "general.alignment"
|
26
|
+
FILE_TYPE = "general.file_type"
|
27
|
+
|
28
|
+
# Authorship Metadata
|
29
|
+
NAME = "general.name"
|
30
|
+
AUTHOR = "general.author"
|
31
|
+
VERSION = "general.version"
|
32
|
+
ORGANIZATION = "general.organization"
|
33
|
+
|
34
|
+
FINETUNE = "general.finetune"
|
35
|
+
BASENAME = "general.basename"
|
36
|
+
|
37
|
+
DESCRIPTION = "general.description"
|
38
|
+
QUANTIZED_BY = "general.quantized_by"
|
39
|
+
|
40
|
+
SIZE_LABEL = "general.size_label"
|
41
|
+
|
42
|
+
# Licensing details
|
43
|
+
LICENSE = "general.license"
|
44
|
+
LICENSE_NAME = "general.license.name"
|
45
|
+
LICENSE_LINK = "general.license.link"
|
46
|
+
|
47
|
+
# Typically represents the converted GGUF repo (Unless native)
|
48
|
+
URL = "general.url" # Model Website/Paper
|
49
|
+
DOI = "general.doi"
|
50
|
+
UUID = "general.uuid"
|
51
|
+
REPO_URL = "general.repo_url" # Model Source Repository (git/svn/etc...)
|
52
|
+
|
53
|
+
# Model Source during conversion
|
54
|
+
SOURCE_URL = "general.source.url" # Model Website/Paper
|
55
|
+
SOURCE_DOI = "general.source.doi"
|
56
|
+
SOURCE_UUID = "general.source.uuid"
|
57
|
+
SOURCE_REPO_URL = "general.source.repo_url" # Model Source Repository (git/svn/etc...)
|
58
|
+
|
59
|
+
# Base Model Source. There can be more than one source if it's a merged
|
60
|
+
# model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in
|
61
|
+
# tracing linage of models as it is finetuned or merged over time.
|
62
|
+
BASE_MODEL_COUNT = "general.base_model.count"
|
63
|
+
BASE_MODEL_NAME = "general.base_model.{id}.name"
|
64
|
+
BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
|
65
|
+
BASE_MODEL_VERSION = "general.base_model.{id}.version"
|
66
|
+
BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
|
67
|
+
BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
|
68
|
+
BASE_MODEL_DOI = "general.base_model.{id}.doi"
|
69
|
+
BASE_MODEL_UUID = "general.base_model.{id}.uuid"
|
70
|
+
BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
|
71
|
+
|
72
|
+
# Array based KV stores
|
73
|
+
TAGS = "general.tags"
|
74
|
+
LANGUAGES = "general.languages"
|
75
|
+
DATASETS = "general.datasets"
|
34
76
|
|
35
77
|
class LLM:
|
36
|
-
VOCAB_SIZE
|
37
|
-
CONTEXT_LENGTH
|
38
|
-
EMBEDDING_LENGTH
|
39
|
-
BLOCK_COUNT
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
78
|
+
VOCAB_SIZE = "{arch}.vocab_size"
|
79
|
+
CONTEXT_LENGTH = "{arch}.context_length"
|
80
|
+
EMBEDDING_LENGTH = "{arch}.embedding_length"
|
81
|
+
BLOCK_COUNT = "{arch}.block_count"
|
82
|
+
LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
|
83
|
+
FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
|
84
|
+
EXPERT_FEED_FORWARD_LENGTH = "{arch}.expert_feed_forward_length"
|
85
|
+
EXPERT_SHARED_FEED_FORWARD_LENGTH = "{arch}.expert_shared_feed_forward_length"
|
86
|
+
USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
|
87
|
+
TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
|
88
|
+
EXPERT_COUNT = "{arch}.expert_count"
|
89
|
+
EXPERT_USED_COUNT = "{arch}.expert_used_count"
|
90
|
+
EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
|
91
|
+
EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
|
92
|
+
POOLING_TYPE = "{arch}.pooling_type"
|
93
|
+
LOGIT_SCALE = "{arch}.logit_scale"
|
94
|
+
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
|
95
|
+
ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
|
96
|
+
FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
|
47
97
|
|
48
98
|
class Attention:
|
49
99
|
HEAD_COUNT = "{arch}.attention.head_count"
|
@@ -68,6 +118,12 @@ class Keys:
|
|
68
118
|
SCALING_ATTN_FACTOR = "{arch}.rope.scaling.attn_factor"
|
69
119
|
SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length"
|
70
120
|
SCALING_FINETUNED = "{arch}.rope.scaling.finetuned"
|
121
|
+
SCALING_YARN_LOG_MUL = "{arch}.rope.scaling.yarn_log_multiplier"
|
122
|
+
|
123
|
+
class Split:
|
124
|
+
LLM_KV_SPLIT_NO = "split.no"
|
125
|
+
LLM_KV_SPLIT_COUNT = "split.count"
|
126
|
+
LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count"
|
71
127
|
|
72
128
|
class SSM:
|
73
129
|
CONV_KERNEL = "{arch}.ssm.conv_kernel"
|
@@ -76,123 +132,175 @@ class Keys:
|
|
76
132
|
TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
|
77
133
|
|
78
134
|
class Tokenizer:
|
79
|
-
MODEL
|
80
|
-
PRE
|
81
|
-
LIST
|
82
|
-
TOKEN_TYPE
|
83
|
-
TOKEN_TYPE_COUNT
|
84
|
-
SCORES
|
85
|
-
MERGES
|
86
|
-
BOS_ID
|
87
|
-
EOS_ID
|
88
|
-
UNK_ID
|
89
|
-
SEP_ID
|
90
|
-
PAD_ID
|
91
|
-
CLS_ID
|
92
|
-
MASK_ID
|
93
|
-
ADD_BOS
|
94
|
-
ADD_EOS
|
95
|
-
ADD_PREFIX
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
135
|
+
MODEL = "tokenizer.ggml.model"
|
136
|
+
PRE = "tokenizer.ggml.pre"
|
137
|
+
LIST = "tokenizer.ggml.tokens"
|
138
|
+
TOKEN_TYPE = "tokenizer.ggml.token_type"
|
139
|
+
TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types
|
140
|
+
SCORES = "tokenizer.ggml.scores"
|
141
|
+
MERGES = "tokenizer.ggml.merges"
|
142
|
+
BOS_ID = "tokenizer.ggml.bos_token_id"
|
143
|
+
EOS_ID = "tokenizer.ggml.eos_token_id"
|
144
|
+
UNK_ID = "tokenizer.ggml.unknown_token_id"
|
145
|
+
SEP_ID = "tokenizer.ggml.seperator_token_id"
|
146
|
+
PAD_ID = "tokenizer.ggml.padding_token_id"
|
147
|
+
CLS_ID = "tokenizer.ggml.cls_token_id"
|
148
|
+
MASK_ID = "tokenizer.ggml.mask_token_id"
|
149
|
+
ADD_BOS = "tokenizer.ggml.add_bos_token"
|
150
|
+
ADD_EOS = "tokenizer.ggml.add_eos_token"
|
151
|
+
ADD_PREFIX = "tokenizer.ggml.add_space_prefix"
|
152
|
+
REMOVE_EXTRA_WS = "tokenizer.ggml.remove_extra_whitespaces"
|
153
|
+
PRECOMPILED_CHARSMAP = "tokenizer.ggml.precompiled_charsmap"
|
154
|
+
HF_JSON = "tokenizer.huggingface.json"
|
155
|
+
RWKV = "tokenizer.rwkv.world"
|
156
|
+
CHAT_TEMPLATE = "tokenizer.chat_template"
|
157
|
+
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
|
158
|
+
CHAT_TEMPLATES = "tokenizer.chat_templates"
|
101
159
|
# FIM/Infill special tokens constants
|
102
|
-
PREFIX_ID
|
103
|
-
SUFFIX_ID
|
104
|
-
MIDDLE_ID
|
105
|
-
EOT_ID
|
160
|
+
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
|
161
|
+
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
|
162
|
+
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
|
163
|
+
EOT_ID = "tokenizer.ggml.eot_token_id"
|
106
164
|
|
165
|
+
class Adapter:
|
166
|
+
TYPE = "adapter.type"
|
167
|
+
LORA_ALPHA = "adapter.lora.alpha"
|
107
168
|
|
108
169
|
#
|
109
170
|
# recommended mapping of model tensor names for storage in gguf
|
110
171
|
#
|
111
172
|
|
112
173
|
|
174
|
+
class GGUFType:
|
175
|
+
MODEL = "model"
|
176
|
+
ADAPTER = "adapter"
|
177
|
+
|
178
|
+
|
113
179
|
class MODEL_ARCH(IntEnum):
|
114
|
-
LLAMA
|
115
|
-
FALCON
|
116
|
-
BAICHUAN
|
117
|
-
GROK
|
118
|
-
GPT2
|
119
|
-
GPTJ
|
120
|
-
GPTNEOX
|
121
|
-
MPT
|
122
|
-
STARCODER
|
123
|
-
REFACT
|
124
|
-
BERT
|
125
|
-
NOMIC_BERT
|
180
|
+
LLAMA = auto()
|
181
|
+
FALCON = auto()
|
182
|
+
BAICHUAN = auto()
|
183
|
+
GROK = auto()
|
184
|
+
GPT2 = auto()
|
185
|
+
GPTJ = auto()
|
186
|
+
GPTNEOX = auto()
|
187
|
+
MPT = auto()
|
188
|
+
STARCODER = auto()
|
189
|
+
REFACT = auto()
|
190
|
+
BERT = auto()
|
191
|
+
NOMIC_BERT = auto()
|
126
192
|
JINA_BERT_V2 = auto()
|
127
|
-
BLOOM
|
128
|
-
STABLELM
|
129
|
-
QWEN
|
130
|
-
QWEN2
|
131
|
-
QWEN2MOE
|
132
|
-
PHI2
|
133
|
-
PHI3
|
134
|
-
PLAMO
|
135
|
-
CODESHELL
|
136
|
-
ORION
|
137
|
-
INTERNLM2
|
138
|
-
MINICPM
|
139
|
-
GEMMA
|
140
|
-
GEMMA2
|
141
|
-
STARCODER2
|
142
|
-
MAMBA
|
143
|
-
XVERSE
|
144
|
-
COMMAND_R
|
145
|
-
DBRX
|
146
|
-
OLMO
|
147
|
-
|
193
|
+
BLOOM = auto()
|
194
|
+
STABLELM = auto()
|
195
|
+
QWEN = auto()
|
196
|
+
QWEN2 = auto()
|
197
|
+
QWEN2MOE = auto()
|
198
|
+
PHI2 = auto()
|
199
|
+
PHI3 = auto()
|
200
|
+
PLAMO = auto()
|
201
|
+
CODESHELL = auto()
|
202
|
+
ORION = auto()
|
203
|
+
INTERNLM2 = auto()
|
204
|
+
MINICPM = auto()
|
205
|
+
GEMMA = auto()
|
206
|
+
GEMMA2 = auto()
|
207
|
+
STARCODER2 = auto()
|
208
|
+
MAMBA = auto()
|
209
|
+
XVERSE = auto()
|
210
|
+
COMMAND_R = auto()
|
211
|
+
DBRX = auto()
|
212
|
+
OLMO = auto()
|
213
|
+
OPENELM = auto()
|
214
|
+
ARCTIC = auto()
|
215
|
+
DEEPSEEK2 = auto()
|
216
|
+
CHATGLM = auto()
|
217
|
+
BITNET = auto()
|
218
|
+
T5 = auto()
|
219
|
+
JAIS = auto()
|
148
220
|
|
149
221
|
|
150
222
|
class MODEL_TENSOR(IntEnum):
|
151
|
-
TOKEN_EMBD
|
152
|
-
TOKEN_EMBD_NORM
|
153
|
-
TOKEN_TYPES
|
154
|
-
POS_EMBD
|
155
|
-
OUTPUT
|
156
|
-
OUTPUT_NORM
|
157
|
-
ROPE_FREQS
|
158
|
-
ROPE_FACTORS_LONG
|
159
|
-
ROPE_FACTORS_SHORT
|
160
|
-
ATTN_Q
|
161
|
-
ATTN_K
|
162
|
-
ATTN_V
|
163
|
-
ATTN_QKV
|
164
|
-
ATTN_OUT
|
165
|
-
ATTN_NORM
|
166
|
-
ATTN_NORM_2
|
167
|
-
ATTN_OUT_NORM
|
168
|
-
ATTN_POST_NORM
|
169
|
-
ATTN_ROT_EMBD
|
170
|
-
FFN_GATE_INP
|
171
|
-
FFN_GATE_INP_SHEXP
|
172
|
-
FFN_NORM
|
173
|
-
FFN_PRE_NORM
|
174
|
-
FFN_POST_NORM
|
175
|
-
FFN_GATE
|
176
|
-
FFN_DOWN
|
177
|
-
FFN_UP
|
178
|
-
FFN_ACT
|
179
|
-
FFN_NORM_EXP
|
180
|
-
FFN_GATE_EXP
|
181
|
-
FFN_DOWN_EXP
|
182
|
-
FFN_UP_EXP
|
183
|
-
FFN_GATE_SHEXP
|
184
|
-
FFN_DOWN_SHEXP
|
185
|
-
FFN_UP_SHEXP
|
186
|
-
ATTN_Q_NORM
|
187
|
-
ATTN_K_NORM
|
188
|
-
LAYER_OUT_NORM
|
189
|
-
SSM_IN
|
190
|
-
SSM_CONV1D
|
191
|
-
SSM_X
|
192
|
-
SSM_DT
|
193
|
-
SSM_A
|
194
|
-
SSM_D
|
195
|
-
SSM_OUT
|
223
|
+
TOKEN_EMBD = auto()
|
224
|
+
TOKEN_EMBD_NORM = auto()
|
225
|
+
TOKEN_TYPES = auto()
|
226
|
+
POS_EMBD = auto()
|
227
|
+
OUTPUT = auto()
|
228
|
+
OUTPUT_NORM = auto()
|
229
|
+
ROPE_FREQS = auto()
|
230
|
+
ROPE_FACTORS_LONG = auto()
|
231
|
+
ROPE_FACTORS_SHORT = auto()
|
232
|
+
ATTN_Q = auto()
|
233
|
+
ATTN_K = auto()
|
234
|
+
ATTN_V = auto()
|
235
|
+
ATTN_QKV = auto()
|
236
|
+
ATTN_OUT = auto()
|
237
|
+
ATTN_NORM = auto()
|
238
|
+
ATTN_NORM_2 = auto()
|
239
|
+
ATTN_OUT_NORM = auto()
|
240
|
+
ATTN_POST_NORM = auto()
|
241
|
+
ATTN_ROT_EMBD = auto()
|
242
|
+
FFN_GATE_INP = auto()
|
243
|
+
FFN_GATE_INP_SHEXP = auto()
|
244
|
+
FFN_NORM = auto()
|
245
|
+
FFN_PRE_NORM = auto()
|
246
|
+
FFN_POST_NORM = auto()
|
247
|
+
FFN_GATE = auto()
|
248
|
+
FFN_DOWN = auto()
|
249
|
+
FFN_UP = auto()
|
250
|
+
FFN_ACT = auto()
|
251
|
+
FFN_NORM_EXP = auto()
|
252
|
+
FFN_GATE_EXP = auto()
|
253
|
+
FFN_DOWN_EXP = auto()
|
254
|
+
FFN_UP_EXP = auto()
|
255
|
+
FFN_GATE_SHEXP = auto()
|
256
|
+
FFN_DOWN_SHEXP = auto()
|
257
|
+
FFN_UP_SHEXP = auto()
|
258
|
+
ATTN_Q_NORM = auto()
|
259
|
+
ATTN_K_NORM = auto()
|
260
|
+
LAYER_OUT_NORM = auto()
|
261
|
+
SSM_IN = auto()
|
262
|
+
SSM_CONV1D = auto()
|
263
|
+
SSM_X = auto()
|
264
|
+
SSM_DT = auto()
|
265
|
+
SSM_A = auto()
|
266
|
+
SSM_D = auto()
|
267
|
+
SSM_OUT = auto()
|
268
|
+
ATTN_Q_A = auto()
|
269
|
+
ATTN_Q_B = auto()
|
270
|
+
ATTN_KV_A_MQA = auto()
|
271
|
+
ATTN_KV_B = auto()
|
272
|
+
ATTN_Q_A_NORM = auto()
|
273
|
+
ATTN_KV_A_NORM = auto()
|
274
|
+
FFN_SUB_NORM = auto()
|
275
|
+
ATTN_SUB_NORM = auto()
|
276
|
+
DEC_ATTN_NORM = auto()
|
277
|
+
DEC_ATTN_Q = auto()
|
278
|
+
DEC_ATTN_K = auto()
|
279
|
+
DEC_ATTN_V = auto()
|
280
|
+
DEC_ATTN_OUT = auto()
|
281
|
+
DEC_ATTN_REL_B = auto()
|
282
|
+
DEC_CROSS_ATTN_NORM = auto()
|
283
|
+
DEC_CROSS_ATTN_Q = auto()
|
284
|
+
DEC_CROSS_ATTN_K = auto()
|
285
|
+
DEC_CROSS_ATTN_V = auto()
|
286
|
+
DEC_CROSS_ATTN_OUT = auto()
|
287
|
+
DEC_CROSS_ATTN_REL_B = auto()
|
288
|
+
DEC_FFN_NORM = auto()
|
289
|
+
DEC_FFN_GATE = auto()
|
290
|
+
DEC_FFN_DOWN = auto()
|
291
|
+
DEC_FFN_UP = auto()
|
292
|
+
DEC_OUTPUT_NORM = auto()
|
293
|
+
ENC_ATTN_NORM = auto()
|
294
|
+
ENC_ATTN_Q = auto()
|
295
|
+
ENC_ATTN_K = auto()
|
296
|
+
ENC_ATTN_V = auto()
|
297
|
+
ENC_ATTN_OUT = auto()
|
298
|
+
ENC_ATTN_REL_B = auto()
|
299
|
+
ENC_FFN_NORM = auto()
|
300
|
+
ENC_FFN_GATE = auto()
|
301
|
+
ENC_FFN_DOWN = auto()
|
302
|
+
ENC_FFN_UP = auto()
|
303
|
+
ENC_OUTPUT_NORM = auto()
|
196
304
|
|
197
305
|
|
198
306
|
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
@@ -229,55 +337,97 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
229
337
|
MODEL_ARCH.COMMAND_R: "command-r",
|
230
338
|
MODEL_ARCH.DBRX: "dbrx",
|
231
339
|
MODEL_ARCH.OLMO: "olmo",
|
340
|
+
MODEL_ARCH.OPENELM: "openelm",
|
232
341
|
MODEL_ARCH.ARCTIC: "arctic",
|
342
|
+
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
343
|
+
MODEL_ARCH.CHATGLM: "chatglm",
|
344
|
+
MODEL_ARCH.BITNET: "bitnet",
|
345
|
+
MODEL_ARCH.T5: "t5",
|
346
|
+
MODEL_ARCH.JAIS: "jais",
|
233
347
|
}
|
234
348
|
|
235
349
|
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
236
|
-
MODEL_TENSOR.TOKEN_EMBD:
|
237
|
-
MODEL_TENSOR.TOKEN_EMBD_NORM:
|
238
|
-
MODEL_TENSOR.TOKEN_TYPES:
|
239
|
-
MODEL_TENSOR.POS_EMBD:
|
240
|
-
MODEL_TENSOR.OUTPUT_NORM:
|
241
|
-
MODEL_TENSOR.OUTPUT:
|
242
|
-
MODEL_TENSOR.ROPE_FREQS:
|
243
|
-
MODEL_TENSOR.ROPE_FACTORS_LONG:
|
244
|
-
MODEL_TENSOR.ROPE_FACTORS_SHORT:
|
245
|
-
MODEL_TENSOR.ATTN_NORM:
|
246
|
-
MODEL_TENSOR.ATTN_NORM_2:
|
247
|
-
MODEL_TENSOR.ATTN_QKV:
|
248
|
-
MODEL_TENSOR.ATTN_Q:
|
249
|
-
MODEL_TENSOR.ATTN_K:
|
250
|
-
MODEL_TENSOR.ATTN_V:
|
251
|
-
MODEL_TENSOR.ATTN_OUT:
|
252
|
-
MODEL_TENSOR.ATTN_ROT_EMBD:
|
253
|
-
MODEL_TENSOR.ATTN_Q_NORM:
|
254
|
-
MODEL_TENSOR.ATTN_K_NORM:
|
255
|
-
MODEL_TENSOR.ATTN_OUT_NORM:
|
350
|
+
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
351
|
+
MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
|
352
|
+
MODEL_TENSOR.TOKEN_TYPES: "token_types",
|
353
|
+
MODEL_TENSOR.POS_EMBD: "position_embd",
|
354
|
+
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
355
|
+
MODEL_TENSOR.OUTPUT: "output",
|
356
|
+
MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
|
357
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
|
358
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
|
359
|
+
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
360
|
+
MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
|
361
|
+
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
|
362
|
+
MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
|
363
|
+
MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
|
364
|
+
MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
|
365
|
+
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
|
366
|
+
MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
|
367
|
+
MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
|
368
|
+
MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
|
369
|
+
MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
|
256
370
|
MODEL_TENSOR.ATTN_POST_NORM: "blk.{bid}.post_attention_norm",
|
257
|
-
MODEL_TENSOR.FFN_GATE_INP:
|
258
|
-
MODEL_TENSOR.FFN_GATE_INP_SHEXP:
|
259
|
-
MODEL_TENSOR.FFN_NORM:
|
260
|
-
MODEL_TENSOR.FFN_PRE_NORM:
|
261
|
-
MODEL_TENSOR.FFN_POST_NORM:
|
262
|
-
MODEL_TENSOR.FFN_GATE:
|
263
|
-
MODEL_TENSOR.FFN_DOWN:
|
264
|
-
MODEL_TENSOR.FFN_UP:
|
265
|
-
MODEL_TENSOR.FFN_GATE_SHEXP:
|
266
|
-
MODEL_TENSOR.FFN_DOWN_SHEXP:
|
267
|
-
MODEL_TENSOR.FFN_UP_SHEXP:
|
268
|
-
MODEL_TENSOR.FFN_ACT:
|
269
|
-
MODEL_TENSOR.FFN_NORM_EXP:
|
270
|
-
MODEL_TENSOR.FFN_GATE_EXP:
|
271
|
-
MODEL_TENSOR.FFN_DOWN_EXP:
|
272
|
-
MODEL_TENSOR.FFN_UP_EXP:
|
273
|
-
MODEL_TENSOR.LAYER_OUT_NORM:
|
274
|
-
MODEL_TENSOR.SSM_IN:
|
275
|
-
MODEL_TENSOR.SSM_CONV1D:
|
276
|
-
MODEL_TENSOR.SSM_X:
|
277
|
-
MODEL_TENSOR.SSM_DT:
|
278
|
-
MODEL_TENSOR.SSM_A:
|
279
|
-
MODEL_TENSOR.SSM_D:
|
280
|
-
MODEL_TENSOR.SSM_OUT:
|
371
|
+
MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
|
372
|
+
MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
|
373
|
+
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
|
374
|
+
MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
|
375
|
+
MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
|
376
|
+
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
|
377
|
+
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
378
|
+
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
379
|
+
MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
|
380
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
|
381
|
+
MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
|
382
|
+
MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
|
383
|
+
MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
|
384
|
+
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
|
385
|
+
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
|
386
|
+
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
|
387
|
+
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
|
388
|
+
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
|
389
|
+
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
|
390
|
+
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
|
391
|
+
MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
|
392
|
+
MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
|
393
|
+
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
|
394
|
+
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
|
395
|
+
MODEL_TENSOR.ATTN_Q_A: "blk.{bid}.attn_q_a",
|
396
|
+
MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
|
397
|
+
MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
|
398
|
+
MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
|
399
|
+
MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
|
400
|
+
MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
|
401
|
+
MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
|
402
|
+
MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
|
403
|
+
MODEL_TENSOR.DEC_ATTN_NORM: "dec.blk.{bid}.attn_norm",
|
404
|
+
MODEL_TENSOR.DEC_ATTN_Q: "dec.blk.{bid}.attn_q",
|
405
|
+
MODEL_TENSOR.DEC_ATTN_K: "dec.blk.{bid}.attn_k",
|
406
|
+
MODEL_TENSOR.DEC_ATTN_V: "dec.blk.{bid}.attn_v",
|
407
|
+
MODEL_TENSOR.DEC_ATTN_OUT: "dec.blk.{bid}.attn_o",
|
408
|
+
MODEL_TENSOR.DEC_ATTN_REL_B: "dec.blk.{bid}.attn_rel_b",
|
409
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_NORM: "dec.blk.{bid}.cross_attn_norm",
|
410
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_Q: "dec.blk.{bid}.cross_attn_q",
|
411
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_K: "dec.blk.{bid}.cross_attn_k",
|
412
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_V: "dec.blk.{bid}.cross_attn_v",
|
413
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_OUT: "dec.blk.{bid}.cross_attn_o",
|
414
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: "dec.blk.{bid}.cross_attn_rel_b",
|
415
|
+
MODEL_TENSOR.DEC_FFN_NORM: "dec.blk.{bid}.ffn_norm",
|
416
|
+
MODEL_TENSOR.DEC_FFN_GATE: "dec.blk.{bid}.ffn_gate",
|
417
|
+
MODEL_TENSOR.DEC_FFN_DOWN: "dec.blk.{bid}.ffn_down",
|
418
|
+
MODEL_TENSOR.DEC_FFN_UP: "dec.blk.{bid}.ffn_up",
|
419
|
+
MODEL_TENSOR.DEC_OUTPUT_NORM: "dec.output_norm",
|
420
|
+
MODEL_TENSOR.ENC_ATTN_NORM: "enc.blk.{bid}.attn_norm",
|
421
|
+
MODEL_TENSOR.ENC_ATTN_Q: "enc.blk.{bid}.attn_q",
|
422
|
+
MODEL_TENSOR.ENC_ATTN_K: "enc.blk.{bid}.attn_k",
|
423
|
+
MODEL_TENSOR.ENC_ATTN_V: "enc.blk.{bid}.attn_v",
|
424
|
+
MODEL_TENSOR.ENC_ATTN_OUT: "enc.blk.{bid}.attn_o",
|
425
|
+
MODEL_TENSOR.ENC_ATTN_REL_B: "enc.blk.{bid}.attn_rel_b",
|
426
|
+
MODEL_TENSOR.ENC_FFN_NORM: "enc.blk.{bid}.ffn_norm",
|
427
|
+
MODEL_TENSOR.ENC_FFN_GATE: "enc.blk.{bid}.ffn_gate",
|
428
|
+
MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
|
429
|
+
MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
|
430
|
+
MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
|
281
431
|
}
|
282
432
|
|
283
433
|
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
@@ -406,6 +556,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
406
556
|
MODEL_TENSOR.TOKEN_EMBD,
|
407
557
|
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
408
558
|
MODEL_TENSOR.TOKEN_TYPES,
|
559
|
+
MODEL_TENSOR.ATTN_NORM_2,
|
409
560
|
MODEL_TENSOR.ATTN_OUT_NORM,
|
410
561
|
MODEL_TENSOR.ATTN_Q,
|
411
562
|
MODEL_TENSOR.ATTN_Q_NORM,
|
@@ -636,6 +787,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
636
787
|
],
|
637
788
|
MODEL_ARCH.MINICPM: [
|
638
789
|
MODEL_TENSOR.TOKEN_EMBD,
|
790
|
+
MODEL_TENSOR.OUTPUT,
|
639
791
|
MODEL_TENSOR.OUTPUT_NORM,
|
640
792
|
MODEL_TENSOR.ROPE_FREQS,
|
641
793
|
MODEL_TENSOR.ATTN_NORM,
|
@@ -763,6 +915,19 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
763
915
|
MODEL_TENSOR.FFN_DOWN,
|
764
916
|
MODEL_TENSOR.FFN_UP,
|
765
917
|
],
|
918
|
+
MODEL_ARCH.OPENELM: [
|
919
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
920
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
921
|
+
MODEL_TENSOR.ATTN_NORM,
|
922
|
+
MODEL_TENSOR.ATTN_QKV,
|
923
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
924
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
925
|
+
MODEL_TENSOR.ATTN_OUT,
|
926
|
+
MODEL_TENSOR.FFN_NORM,
|
927
|
+
MODEL_TENSOR.FFN_GATE,
|
928
|
+
MODEL_TENSOR.FFN_DOWN,
|
929
|
+
MODEL_TENSOR.FFN_UP,
|
930
|
+
],
|
766
931
|
MODEL_ARCH.ARCTIC: [
|
767
932
|
MODEL_TENSOR.TOKEN_EMBD,
|
768
933
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -784,6 +949,104 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
784
949
|
MODEL_TENSOR.FFN_DOWN_EXP,
|
785
950
|
MODEL_TENSOR.FFN_UP_EXP,
|
786
951
|
],
|
952
|
+
MODEL_ARCH.DEEPSEEK2: [
|
953
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
954
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
955
|
+
MODEL_TENSOR.OUTPUT,
|
956
|
+
MODEL_TENSOR.ROPE_FREQS,
|
957
|
+
MODEL_TENSOR.ATTN_NORM,
|
958
|
+
MODEL_TENSOR.ATTN_Q,
|
959
|
+
MODEL_TENSOR.ATTN_Q_A,
|
960
|
+
MODEL_TENSOR.ATTN_Q_B,
|
961
|
+
MODEL_TENSOR.ATTN_KV_A_MQA,
|
962
|
+
MODEL_TENSOR.ATTN_KV_B,
|
963
|
+
MODEL_TENSOR.ATTN_Q_A_NORM,
|
964
|
+
MODEL_TENSOR.ATTN_KV_A_NORM,
|
965
|
+
MODEL_TENSOR.ATTN_OUT,
|
966
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
967
|
+
MODEL_TENSOR.FFN_GATE_INP,
|
968
|
+
MODEL_TENSOR.FFN_NORM,
|
969
|
+
MODEL_TENSOR.FFN_GATE,
|
970
|
+
MODEL_TENSOR.FFN_DOWN,
|
971
|
+
MODEL_TENSOR.FFN_UP,
|
972
|
+
MODEL_TENSOR.FFN_GATE_EXP,
|
973
|
+
MODEL_TENSOR.FFN_DOWN_EXP,
|
974
|
+
MODEL_TENSOR.FFN_UP_EXP,
|
975
|
+
MODEL_TENSOR.FFN_GATE_SHEXP,
|
976
|
+
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
977
|
+
MODEL_TENSOR.FFN_UP_SHEXP,
|
978
|
+
],
|
979
|
+
MODEL_ARCH.CHATGLM : [
|
980
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
981
|
+
MODEL_TENSOR.ROPE_FREQS,
|
982
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
983
|
+
MODEL_TENSOR.OUTPUT,
|
984
|
+
MODEL_TENSOR.ATTN_NORM,
|
985
|
+
MODEL_TENSOR.ATTN_QKV,
|
986
|
+
MODEL_TENSOR.ATTN_OUT,
|
987
|
+
MODEL_TENSOR.FFN_NORM,
|
988
|
+
MODEL_TENSOR.FFN_DOWN,
|
989
|
+
MODEL_TENSOR.FFN_UP,
|
990
|
+
],
|
991
|
+
MODEL_ARCH.BITNET: [
|
992
|
+
MODEL_TENSOR.ATTN_Q,
|
993
|
+
MODEL_TENSOR.ATTN_K,
|
994
|
+
MODEL_TENSOR.ATTN_V,
|
995
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
996
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
997
|
+
MODEL_TENSOR.ATTN_NORM,
|
998
|
+
MODEL_TENSOR.ATTN_OUT,
|
999
|
+
MODEL_TENSOR.FFN_NORM,
|
1000
|
+
MODEL_TENSOR.FFN_GATE,
|
1001
|
+
MODEL_TENSOR.FFN_DOWN,
|
1002
|
+
MODEL_TENSOR.FFN_UP,
|
1003
|
+
MODEL_TENSOR.ATTN_SUB_NORM,
|
1004
|
+
MODEL_TENSOR.FFN_SUB_NORM,
|
1005
|
+
],
|
1006
|
+
MODEL_ARCH.T5: [
|
1007
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1008
|
+
MODEL_TENSOR.OUTPUT,
|
1009
|
+
MODEL_TENSOR.DEC_ATTN_NORM,
|
1010
|
+
MODEL_TENSOR.DEC_ATTN_Q,
|
1011
|
+
MODEL_TENSOR.DEC_ATTN_K,
|
1012
|
+
MODEL_TENSOR.DEC_ATTN_V,
|
1013
|
+
MODEL_TENSOR.DEC_ATTN_OUT,
|
1014
|
+
MODEL_TENSOR.DEC_ATTN_REL_B,
|
1015
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_NORM,
|
1016
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_Q,
|
1017
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_K,
|
1018
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_V,
|
1019
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_OUT,
|
1020
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B,
|
1021
|
+
MODEL_TENSOR.DEC_FFN_NORM,
|
1022
|
+
MODEL_TENSOR.DEC_FFN_GATE,
|
1023
|
+
MODEL_TENSOR.DEC_FFN_DOWN,
|
1024
|
+
MODEL_TENSOR.DEC_FFN_UP,
|
1025
|
+
MODEL_TENSOR.DEC_OUTPUT_NORM,
|
1026
|
+
MODEL_TENSOR.ENC_ATTN_NORM,
|
1027
|
+
MODEL_TENSOR.ENC_ATTN_Q,
|
1028
|
+
MODEL_TENSOR.ENC_ATTN_K,
|
1029
|
+
MODEL_TENSOR.ENC_ATTN_V,
|
1030
|
+
MODEL_TENSOR.ENC_ATTN_OUT,
|
1031
|
+
MODEL_TENSOR.ENC_ATTN_REL_B,
|
1032
|
+
MODEL_TENSOR.ENC_FFN_NORM,
|
1033
|
+
MODEL_TENSOR.ENC_FFN_GATE,
|
1034
|
+
MODEL_TENSOR.ENC_FFN_DOWN,
|
1035
|
+
MODEL_TENSOR.ENC_FFN_UP,
|
1036
|
+
MODEL_TENSOR.ENC_OUTPUT_NORM,
|
1037
|
+
],
|
1038
|
+
MODEL_ARCH.JAIS: [
|
1039
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1040
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1041
|
+
MODEL_TENSOR.OUTPUT,
|
1042
|
+
MODEL_TENSOR.ATTN_NORM,
|
1043
|
+
MODEL_TENSOR.ATTN_QKV,
|
1044
|
+
MODEL_TENSOR.ATTN_OUT,
|
1045
|
+
MODEL_TENSOR.FFN_NORM,
|
1046
|
+
MODEL_TENSOR.FFN_DOWN,
|
1047
|
+
MODEL_TENSOR.FFN_GATE,
|
1048
|
+
MODEL_TENSOR.FFN_UP,
|
1049
|
+
],
|
787
1050
|
# TODO
|
788
1051
|
}
|
789
1052
|
|
@@ -817,6 +1080,13 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
817
1080
|
MODEL_TENSOR.ROPE_FREQS,
|
818
1081
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
819
1082
|
],
|
1083
|
+
MODEL_ARCH.DEEPSEEK2: [
|
1084
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1085
|
+
MODEL_TENSOR.ATTN_ROT_EMBD,
|
1086
|
+
],
|
1087
|
+
MODEL_ARCH.CHATGLM: [
|
1088
|
+
MODEL_TENSOR.ROPE_FREQS,
|
1089
|
+
],
|
820
1090
|
}
|
821
1091
|
|
822
1092
|
#
|
@@ -1004,7 +1274,6 @@ KEY_GENERAL_URL = Keys.General.URL
|
|
1004
1274
|
KEY_GENERAL_DESCRIPTION = Keys.General.DESCRIPTION
|
1005
1275
|
KEY_GENERAL_LICENSE = Keys.General.LICENSE
|
1006
1276
|
KEY_GENERAL_SOURCE_URL = Keys.General.SOURCE_URL
|
1007
|
-
KEY_GENERAL_SOURCE_HF_REPO = Keys.General.SOURCE_HF_REPO
|
1008
1277
|
KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE
|
1009
1278
|
|
1010
1279
|
# LLM
|
@@ -1057,4 +1326,4 @@ KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
|
|
1057
1326
|
KEY_TOKENIZER_PRIFIX_ID = Keys.Tokenizer.PREFIX_ID
|
1058
1327
|
KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
|
1059
1328
|
KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
|
1060
|
-
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
|
1329
|
+
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
|