bigdl-core-cpp 2.5.0b20240724__py3-none-win_amd64.whl → 2.5.0b20240726__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. bigdl/cpp/convert-hf-to-gguf.py +1148 -315
  2. bigdl/cpp/gguf-py/gguf/__init__.py +2 -0
  3. bigdl/cpp/gguf-py/gguf/constants.py +463 -167
  4. bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
  5. bigdl/cpp/gguf-py/gguf/gguf_reader.py +29 -8
  6. bigdl/cpp/gguf-py/gguf/gguf_writer.py +475 -156
  7. bigdl/cpp/gguf-py/gguf/lazy.py +24 -49
  8. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +209 -23
  9. bigdl/cpp/libs/baby-llama.exe +0 -0
  10. bigdl/cpp/libs/batched-bench.exe +0 -0
  11. bigdl/cpp/libs/batched.exe +0 -0
  12. bigdl/cpp/libs/beam-search.exe +0 -0
  13. bigdl/cpp/libs/benchmark.exe +0 -0
  14. bigdl/cpp/libs/common.lib +0 -0
  15. bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
  16. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
  17. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
  18. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
  19. bigdl/cpp/libs/embedding.exe +0 -0
  20. bigdl/cpp/libs/export-lora.exe +0 -0
  21. bigdl/cpp/libs/finetune.exe +0 -0
  22. bigdl/cpp/libs/ggml_shared.dll +0 -0
  23. bigdl/cpp/libs/gguf.exe +0 -0
  24. bigdl/cpp/libs/gritlm.exe +0 -0
  25. bigdl/cpp/libs/imatrix.exe +0 -0
  26. bigdl/cpp/libs/infill.exe +0 -0
  27. bigdl/cpp/libs/llama-bench.exe +0 -0
  28. bigdl/cpp/libs/llama.dll +0 -0
  29. bigdl/cpp/libs/llava-cli.exe +0 -0
  30. bigdl/cpp/libs/llava_shared.dll +0 -0
  31. bigdl/cpp/libs/lookahead.exe +0 -0
  32. bigdl/cpp/libs/lookup.exe +0 -0
  33. bigdl/cpp/libs/ls-sycl-device.exe +0 -0
  34. bigdl/cpp/libs/main.exe +0 -0
  35. bigdl/cpp/libs/ollama.exe +0 -0
  36. bigdl/cpp/libs/parallel.exe +0 -0
  37. bigdl/cpp/libs/passkey.exe +0 -0
  38. bigdl/cpp/libs/perplexity.exe +0 -0
  39. bigdl/cpp/libs/q8dot.exe +0 -0
  40. bigdl/cpp/libs/quantize-stats.exe +0 -0
  41. bigdl/cpp/libs/quantize.exe +0 -0
  42. bigdl/cpp/libs/save-load-state.exe +0 -0
  43. bigdl/cpp/libs/server.exe +0 -0
  44. bigdl/cpp/libs/simple.exe +0 -0
  45. bigdl/cpp/libs/speculative.exe +0 -0
  46. bigdl/cpp/libs/tokenize.exe +0 -0
  47. bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
  48. bigdl/cpp/libs/vdot.exe +0 -0
  49. {bigdl_core_cpp-2.5.0b20240724.dist-info → bigdl_core_cpp-2.5.0b20240726.dist-info}/METADATA +1 -1
  50. bigdl_core_cpp-2.5.0b20240726.dist-info/RECORD +61 -0
  51. bigdl_core_cpp-2.5.0b20240724.dist-info/RECORD +0 -61
  52. {bigdl_core_cpp-2.5.0b20240724.data → bigdl_core_cpp-2.5.0b20240726.data}/scripts/init-llama-cpp.bat +0 -0
  53. {bigdl_core_cpp-2.5.0b20240724.data → bigdl_core_cpp-2.5.0b20240726.data}/scripts/init-llama-cpp.ps1 +0 -0
  54. {bigdl_core_cpp-2.5.0b20240724.data → bigdl_core_cpp-2.5.0b20240726.data}/scripts/init-ollama.bat +0 -0
  55. {bigdl_core_cpp-2.5.0b20240724.dist-info → bigdl_core_cpp-2.5.0b20240726.dist-info}/WHEEL +0 -0
  56. {bigdl_core_cpp-2.5.0b20240724.dist-info → bigdl_core_cpp-2.5.0b20240726.dist-info}/top_level.txt +0 -0
@@ -19,31 +19,81 @@ GGML_QUANT_VERSION = 2 # GGML_QNT_VERSION from ggml.h
19
19
 
20
20
  class Keys:
21
21
  class General:
22
- ARCHITECTURE = "general.architecture"
23
- QUANTIZATION_VERSION = "general.quantization_version"
24
- ALIGNMENT = "general.alignment"
25
- NAME = "general.name"
26
- AUTHOR = "general.author"
27
- VERSION = "general.version"
28
- URL = "general.url"
29
- DESCRIPTION = "general.description"
30
- LICENSE = "general.license"
31
- SOURCE_URL = "general.source.url"
32
- SOURCE_HF_REPO = "general.source.huggingface.repository"
33
- FILE_TYPE = "general.file_type"
22
+ TYPE = "general.type"
23
+ ARCHITECTURE = "general.architecture"
24
+ QUANTIZATION_VERSION = "general.quantization_version"
25
+ ALIGNMENT = "general.alignment"
26
+ FILE_TYPE = "general.file_type"
27
+
28
+ # Authorship Metadata
29
+ NAME = "general.name"
30
+ AUTHOR = "general.author"
31
+ VERSION = "general.version"
32
+ ORGANIZATION = "general.organization"
33
+
34
+ FINETUNE = "general.finetune"
35
+ BASENAME = "general.basename"
36
+
37
+ DESCRIPTION = "general.description"
38
+ QUANTIZED_BY = "general.quantized_by"
39
+
40
+ SIZE_LABEL = "general.size_label"
41
+
42
+ # Licensing details
43
+ LICENSE = "general.license"
44
+ LICENSE_NAME = "general.license.name"
45
+ LICENSE_LINK = "general.license.link"
46
+
47
+ # Typically represents the converted GGUF repo (Unless native)
48
+ URL = "general.url" # Model Website/Paper
49
+ DOI = "general.doi"
50
+ UUID = "general.uuid"
51
+ REPO_URL = "general.repo_url" # Model Source Repository (git/svn/etc...)
52
+
53
+ # Model Source during conversion
54
+ SOURCE_URL = "general.source.url" # Model Website/Paper
55
+ SOURCE_DOI = "general.source.doi"
56
+ SOURCE_UUID = "general.source.uuid"
57
+ SOURCE_REPO_URL = "general.source.repo_url" # Model Source Repository (git/svn/etc...)
58
+
59
+ # Base Model Source. There can be more than one source if it's a merged
60
+ # model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in
61
+ # tracing linage of models as it is finetuned or merged over time.
62
+ BASE_MODEL_COUNT = "general.base_model.count"
63
+ BASE_MODEL_NAME = "general.base_model.{id}.name"
64
+ BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
65
+ BASE_MODEL_VERSION = "general.base_model.{id}.version"
66
+ BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
67
+ BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
68
+ BASE_MODEL_DOI = "general.base_model.{id}.doi"
69
+ BASE_MODEL_UUID = "general.base_model.{id}.uuid"
70
+ BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
71
+
72
+ # Array based KV stores
73
+ TAGS = "general.tags"
74
+ LANGUAGES = "general.languages"
75
+ DATASETS = "general.datasets"
34
76
 
35
77
  class LLM:
36
- VOCAB_SIZE = "{arch}.vocab_size"
37
- CONTEXT_LENGTH = "{arch}.context_length"
38
- EMBEDDING_LENGTH = "{arch}.embedding_length"
39
- BLOCK_COUNT = "{arch}.block_count"
40
- FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
41
- USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
42
- TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
43
- EXPERT_COUNT = "{arch}.expert_count"
44
- EXPERT_USED_COUNT = "{arch}.expert_used_count"
45
- POOLING_TYPE = "{arch}.pooling_type"
46
- LOGIT_SCALE = "{arch}.logit_scale"
78
+ VOCAB_SIZE = "{arch}.vocab_size"
79
+ CONTEXT_LENGTH = "{arch}.context_length"
80
+ EMBEDDING_LENGTH = "{arch}.embedding_length"
81
+ BLOCK_COUNT = "{arch}.block_count"
82
+ LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
83
+ FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
84
+ EXPERT_FEED_FORWARD_LENGTH = "{arch}.expert_feed_forward_length"
85
+ EXPERT_SHARED_FEED_FORWARD_LENGTH = "{arch}.expert_shared_feed_forward_length"
86
+ USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
87
+ TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
88
+ EXPERT_COUNT = "{arch}.expert_count"
89
+ EXPERT_USED_COUNT = "{arch}.expert_used_count"
90
+ EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
91
+ EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
92
+ POOLING_TYPE = "{arch}.pooling_type"
93
+ LOGIT_SCALE = "{arch}.logit_scale"
94
+ DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
95
+ ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
96
+ FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
47
97
 
48
98
  class Attention:
49
99
  HEAD_COUNT = "{arch}.attention.head_count"
@@ -55,6 +105,10 @@ class Keys:
55
105
  LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
56
106
  LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
57
107
  CAUSAL = "{arch}.attention.causal"
108
+ Q_LORA_RANK = "{arch}.attention.q_lora_rank"
109
+ KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
110
+ REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
111
+ SLIDING_WINDOW = "{arch}.attention.sliding_window"
58
112
 
59
113
  class Rope:
60
114
  DIMENSION_COUNT = "{arch}.rope.dimension_count"
@@ -64,6 +118,12 @@ class Keys:
64
118
  SCALING_ATTN_FACTOR = "{arch}.rope.scaling.attn_factor"
65
119
  SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length"
66
120
  SCALING_FINETUNED = "{arch}.rope.scaling.finetuned"
121
+ SCALING_YARN_LOG_MUL = "{arch}.rope.scaling.yarn_log_multiplier"
122
+
123
+ class Split:
124
+ LLM_KV_SPLIT_NO = "split.no"
125
+ LLM_KV_SPLIT_COUNT = "split.count"
126
+ LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count"
67
127
 
68
128
  class SSM:
69
129
  CONV_KERNEL = "{arch}.ssm.conv_kernel"
@@ -72,119 +132,175 @@ class Keys:
72
132
  TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
73
133
 
74
134
  class Tokenizer:
75
- MODEL = "tokenizer.ggml.model"
76
- PRE = "tokenizer.ggml.pre"
77
- LIST = "tokenizer.ggml.tokens"
78
- TOKEN_TYPE = "tokenizer.ggml.token_type"
79
- TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types
80
- SCORES = "tokenizer.ggml.scores"
81
- MERGES = "tokenizer.ggml.merges"
82
- BOS_ID = "tokenizer.ggml.bos_token_id"
83
- EOS_ID = "tokenizer.ggml.eos_token_id"
84
- UNK_ID = "tokenizer.ggml.unknown_token_id"
85
- SEP_ID = "tokenizer.ggml.seperator_token_id"
86
- PAD_ID = "tokenizer.ggml.padding_token_id"
87
- CLS_ID = "tokenizer.ggml.cls_token_id"
88
- MASK_ID = "tokenizer.ggml.mask_token_id"
89
- ADD_BOS = "tokenizer.ggml.add_bos_token"
90
- ADD_EOS = "tokenizer.ggml.add_eos_token"
91
- ADD_PREFIX = "tokenizer.ggml.add_space_prefix"
92
- HF_JSON = "tokenizer.huggingface.json"
93
- RWKV = "tokenizer.rwkv.world"
94
- CHAT_TEMPLATE = "tokenizer.chat_template"
95
- CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
96
- CHAT_TEMPLATES = "tokenizer.chat_templates"
135
+ MODEL = "tokenizer.ggml.model"
136
+ PRE = "tokenizer.ggml.pre"
137
+ LIST = "tokenizer.ggml.tokens"
138
+ TOKEN_TYPE = "tokenizer.ggml.token_type"
139
+ TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types
140
+ SCORES = "tokenizer.ggml.scores"
141
+ MERGES = "tokenizer.ggml.merges"
142
+ BOS_ID = "tokenizer.ggml.bos_token_id"
143
+ EOS_ID = "tokenizer.ggml.eos_token_id"
144
+ UNK_ID = "tokenizer.ggml.unknown_token_id"
145
+ SEP_ID = "tokenizer.ggml.seperator_token_id"
146
+ PAD_ID = "tokenizer.ggml.padding_token_id"
147
+ CLS_ID = "tokenizer.ggml.cls_token_id"
148
+ MASK_ID = "tokenizer.ggml.mask_token_id"
149
+ ADD_BOS = "tokenizer.ggml.add_bos_token"
150
+ ADD_EOS = "tokenizer.ggml.add_eos_token"
151
+ ADD_PREFIX = "tokenizer.ggml.add_space_prefix"
152
+ REMOVE_EXTRA_WS = "tokenizer.ggml.remove_extra_whitespaces"
153
+ PRECOMPILED_CHARSMAP = "tokenizer.ggml.precompiled_charsmap"
154
+ HF_JSON = "tokenizer.huggingface.json"
155
+ RWKV = "tokenizer.rwkv.world"
156
+ CHAT_TEMPLATE = "tokenizer.chat_template"
157
+ CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
158
+ CHAT_TEMPLATES = "tokenizer.chat_templates"
97
159
  # FIM/Infill special tokens constants
98
- PREFIX_ID = "tokenizer.ggml.prefix_token_id"
99
- SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
100
- MIDDLE_ID = "tokenizer.ggml.middle_token_id"
101
- EOT_ID = "tokenizer.ggml.eot_token_id"
160
+ PREFIX_ID = "tokenizer.ggml.prefix_token_id"
161
+ SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
162
+ MIDDLE_ID = "tokenizer.ggml.middle_token_id"
163
+ EOT_ID = "tokenizer.ggml.eot_token_id"
102
164
 
165
+ class Adapter:
166
+ TYPE = "adapter.type"
167
+ LORA_ALPHA = "adapter.lora.alpha"
103
168
 
104
169
  #
105
170
  # recommended mapping of model tensor names for storage in gguf
106
171
  #
107
172
 
108
173
 
174
+ class GGUFType:
175
+ MODEL = "model"
176
+ ADAPTER = "adapter"
177
+
178
+
109
179
  class MODEL_ARCH(IntEnum):
110
- LLAMA = auto()
111
- FALCON = auto()
112
- BAICHUAN = auto()
113
- GROK = auto()
114
- GPT2 = auto()
115
- GPTJ = auto()
116
- GPTNEOX = auto()
117
- MPT = auto()
118
- STARCODER = auto()
119
- REFACT = auto()
120
- BERT = auto()
121
- NOMIC_BERT = auto()
180
+ LLAMA = auto()
181
+ FALCON = auto()
182
+ BAICHUAN = auto()
183
+ GROK = auto()
184
+ GPT2 = auto()
185
+ GPTJ = auto()
186
+ GPTNEOX = auto()
187
+ MPT = auto()
188
+ STARCODER = auto()
189
+ REFACT = auto()
190
+ BERT = auto()
191
+ NOMIC_BERT = auto()
122
192
  JINA_BERT_V2 = auto()
123
- BLOOM = auto()
124
- STABLELM = auto()
125
- QWEN = auto()
126
- QWEN2 = auto()
127
- QWEN2MOE = auto()
128
- PHI2 = auto()
129
- PHI3 = auto()
130
- PLAMO = auto()
131
- CODESHELL = auto()
132
- ORION = auto()
133
- INTERNLM2 = auto()
134
- MINICPM = auto()
135
- GEMMA = auto()
136
- STARCODER2 = auto()
137
- MAMBA = auto()
138
- XVERSE = auto()
139
- COMMAND_R = auto()
140
- DBRX = auto()
141
- OLMO = auto()
142
- ARCTIC = auto()
193
+ BLOOM = auto()
194
+ STABLELM = auto()
195
+ QWEN = auto()
196
+ QWEN2 = auto()
197
+ QWEN2MOE = auto()
198
+ PHI2 = auto()
199
+ PHI3 = auto()
200
+ PLAMO = auto()
201
+ CODESHELL = auto()
202
+ ORION = auto()
203
+ INTERNLM2 = auto()
204
+ MINICPM = auto()
205
+ GEMMA = auto()
206
+ GEMMA2 = auto()
207
+ STARCODER2 = auto()
208
+ MAMBA = auto()
209
+ XVERSE = auto()
210
+ COMMAND_R = auto()
211
+ DBRX = auto()
212
+ OLMO = auto()
213
+ OPENELM = auto()
214
+ ARCTIC = auto()
215
+ DEEPSEEK2 = auto()
216
+ CHATGLM = auto()
217
+ BITNET = auto()
218
+ T5 = auto()
219
+ JAIS = auto()
143
220
 
144
221
 
145
222
  class MODEL_TENSOR(IntEnum):
146
- TOKEN_EMBD = auto()
147
- TOKEN_EMBD_NORM = auto()
148
- TOKEN_TYPES = auto()
149
- POS_EMBD = auto()
150
- OUTPUT = auto()
151
- OUTPUT_NORM = auto()
152
- ROPE_FREQS = auto()
153
- ROPE_FACTORS_LONG = auto()
154
- ROPE_FACTORS_SHORT = auto()
155
- ATTN_Q = auto()
156
- ATTN_K = auto()
157
- ATTN_V = auto()
158
- ATTN_QKV = auto()
159
- ATTN_OUT = auto()
160
- ATTN_NORM = auto()
161
- ATTN_NORM_2 = auto()
162
- ATTN_OUT_NORM = auto()
163
- ATTN_ROT_EMBD = auto()
164
- FFN_GATE_INP = auto()
165
- FFN_GATE_INP_SHEXP = auto()
166
- FFN_NORM = auto()
167
- FFN_GATE = auto()
168
- FFN_DOWN = auto()
169
- FFN_UP = auto()
170
- FFN_ACT = auto()
171
- FFN_NORM_EXP = auto()
172
- FFN_GATE_EXP = auto()
173
- FFN_DOWN_EXP = auto()
174
- FFN_UP_EXP = auto()
175
- FFN_GATE_SHEXP = auto()
176
- FFN_DOWN_SHEXP = auto()
177
- FFN_UP_SHEXP = auto()
178
- ATTN_Q_NORM = auto()
179
- ATTN_K_NORM = auto()
180
- LAYER_OUT_NORM = auto()
181
- SSM_IN = auto()
182
- SSM_CONV1D = auto()
183
- SSM_X = auto()
184
- SSM_DT = auto()
185
- SSM_A = auto()
186
- SSM_D = auto()
187
- SSM_OUT = auto()
223
+ TOKEN_EMBD = auto()
224
+ TOKEN_EMBD_NORM = auto()
225
+ TOKEN_TYPES = auto()
226
+ POS_EMBD = auto()
227
+ OUTPUT = auto()
228
+ OUTPUT_NORM = auto()
229
+ ROPE_FREQS = auto()
230
+ ROPE_FACTORS_LONG = auto()
231
+ ROPE_FACTORS_SHORT = auto()
232
+ ATTN_Q = auto()
233
+ ATTN_K = auto()
234
+ ATTN_V = auto()
235
+ ATTN_QKV = auto()
236
+ ATTN_OUT = auto()
237
+ ATTN_NORM = auto()
238
+ ATTN_NORM_2 = auto()
239
+ ATTN_OUT_NORM = auto()
240
+ ATTN_POST_NORM = auto()
241
+ ATTN_ROT_EMBD = auto()
242
+ FFN_GATE_INP = auto()
243
+ FFN_GATE_INP_SHEXP = auto()
244
+ FFN_NORM = auto()
245
+ FFN_PRE_NORM = auto()
246
+ FFN_POST_NORM = auto()
247
+ FFN_GATE = auto()
248
+ FFN_DOWN = auto()
249
+ FFN_UP = auto()
250
+ FFN_ACT = auto()
251
+ FFN_NORM_EXP = auto()
252
+ FFN_GATE_EXP = auto()
253
+ FFN_DOWN_EXP = auto()
254
+ FFN_UP_EXP = auto()
255
+ FFN_GATE_SHEXP = auto()
256
+ FFN_DOWN_SHEXP = auto()
257
+ FFN_UP_SHEXP = auto()
258
+ ATTN_Q_NORM = auto()
259
+ ATTN_K_NORM = auto()
260
+ LAYER_OUT_NORM = auto()
261
+ SSM_IN = auto()
262
+ SSM_CONV1D = auto()
263
+ SSM_X = auto()
264
+ SSM_DT = auto()
265
+ SSM_A = auto()
266
+ SSM_D = auto()
267
+ SSM_OUT = auto()
268
+ ATTN_Q_A = auto()
269
+ ATTN_Q_B = auto()
270
+ ATTN_KV_A_MQA = auto()
271
+ ATTN_KV_B = auto()
272
+ ATTN_Q_A_NORM = auto()
273
+ ATTN_KV_A_NORM = auto()
274
+ FFN_SUB_NORM = auto()
275
+ ATTN_SUB_NORM = auto()
276
+ DEC_ATTN_NORM = auto()
277
+ DEC_ATTN_Q = auto()
278
+ DEC_ATTN_K = auto()
279
+ DEC_ATTN_V = auto()
280
+ DEC_ATTN_OUT = auto()
281
+ DEC_ATTN_REL_B = auto()
282
+ DEC_CROSS_ATTN_NORM = auto()
283
+ DEC_CROSS_ATTN_Q = auto()
284
+ DEC_CROSS_ATTN_K = auto()
285
+ DEC_CROSS_ATTN_V = auto()
286
+ DEC_CROSS_ATTN_OUT = auto()
287
+ DEC_CROSS_ATTN_REL_B = auto()
288
+ DEC_FFN_NORM = auto()
289
+ DEC_FFN_GATE = auto()
290
+ DEC_FFN_DOWN = auto()
291
+ DEC_FFN_UP = auto()
292
+ DEC_OUTPUT_NORM = auto()
293
+ ENC_ATTN_NORM = auto()
294
+ ENC_ATTN_Q = auto()
295
+ ENC_ATTN_K = auto()
296
+ ENC_ATTN_V = auto()
297
+ ENC_ATTN_OUT = auto()
298
+ ENC_ATTN_REL_B = auto()
299
+ ENC_FFN_NORM = auto()
300
+ ENC_FFN_GATE = auto()
301
+ ENC_FFN_DOWN = auto()
302
+ ENC_FFN_UP = auto()
303
+ ENC_OUTPUT_NORM = auto()
188
304
 
189
305
 
190
306
  MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
@@ -214,58 +330,104 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
214
330
  MODEL_ARCH.INTERNLM2: "internlm2",
215
331
  MODEL_ARCH.MINICPM: "minicpm",
216
332
  MODEL_ARCH.GEMMA: "gemma",
333
+ MODEL_ARCH.GEMMA2: "gemma2",
217
334
  MODEL_ARCH.STARCODER2: "starcoder2",
218
335
  MODEL_ARCH.MAMBA: "mamba",
219
336
  MODEL_ARCH.XVERSE: "xverse",
220
337
  MODEL_ARCH.COMMAND_R: "command-r",
221
338
  MODEL_ARCH.DBRX: "dbrx",
222
339
  MODEL_ARCH.OLMO: "olmo",
340
+ MODEL_ARCH.OPENELM: "openelm",
223
341
  MODEL_ARCH.ARCTIC: "arctic",
342
+ MODEL_ARCH.DEEPSEEK2: "deepseek2",
343
+ MODEL_ARCH.CHATGLM: "chatglm",
344
+ MODEL_ARCH.BITNET: "bitnet",
345
+ MODEL_ARCH.T5: "t5",
346
+ MODEL_ARCH.JAIS: "jais",
224
347
  }
225
348
 
226
349
  TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
227
- MODEL_TENSOR.TOKEN_EMBD: "token_embd",
228
- MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
229
- MODEL_TENSOR.TOKEN_TYPES: "token_types",
230
- MODEL_TENSOR.POS_EMBD: "position_embd",
231
- MODEL_TENSOR.OUTPUT_NORM: "output_norm",
232
- MODEL_TENSOR.OUTPUT: "output",
233
- MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
234
- MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
235
- MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
236
- MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
237
- MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
238
- MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
239
- MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
240
- MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
241
- MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
242
- MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
243
- MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
244
- MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
245
- MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
246
- MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
247
- MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
248
- MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
249
- MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
250
- MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
251
- MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
252
- MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
253
- MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
254
- MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
255
- MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
256
- MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
257
- MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
258
- MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
259
- MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
260
- MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
261
- MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
262
- MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
263
- MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
264
- MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
265
- MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
266
- MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
267
- MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
268
- MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
350
+ MODEL_TENSOR.TOKEN_EMBD: "token_embd",
351
+ MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
352
+ MODEL_TENSOR.TOKEN_TYPES: "token_types",
353
+ MODEL_TENSOR.POS_EMBD: "position_embd",
354
+ MODEL_TENSOR.OUTPUT_NORM: "output_norm",
355
+ MODEL_TENSOR.OUTPUT: "output",
356
+ MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
357
+ MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
358
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
359
+ MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
360
+ MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
361
+ MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
362
+ MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
363
+ MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
364
+ MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
365
+ MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
366
+ MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
367
+ MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
368
+ MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
369
+ MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
370
+ MODEL_TENSOR.ATTN_POST_NORM: "blk.{bid}.post_attention_norm",
371
+ MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
372
+ MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
373
+ MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
374
+ MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
375
+ MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
376
+ MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
377
+ MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
378
+ MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
379
+ MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
380
+ MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
381
+ MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
382
+ MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
383
+ MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
384
+ MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
385
+ MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
386
+ MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
387
+ MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
388
+ MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
389
+ MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
390
+ MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
391
+ MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
392
+ MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
393
+ MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
394
+ MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
395
+ MODEL_TENSOR.ATTN_Q_A: "blk.{bid}.attn_q_a",
396
+ MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
397
+ MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
398
+ MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
399
+ MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
400
+ MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
401
+ MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
402
+ MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
403
+ MODEL_TENSOR.DEC_ATTN_NORM: "dec.blk.{bid}.attn_norm",
404
+ MODEL_TENSOR.DEC_ATTN_Q: "dec.blk.{bid}.attn_q",
405
+ MODEL_TENSOR.DEC_ATTN_K: "dec.blk.{bid}.attn_k",
406
+ MODEL_TENSOR.DEC_ATTN_V: "dec.blk.{bid}.attn_v",
407
+ MODEL_TENSOR.DEC_ATTN_OUT: "dec.blk.{bid}.attn_o",
408
+ MODEL_TENSOR.DEC_ATTN_REL_B: "dec.blk.{bid}.attn_rel_b",
409
+ MODEL_TENSOR.DEC_CROSS_ATTN_NORM: "dec.blk.{bid}.cross_attn_norm",
410
+ MODEL_TENSOR.DEC_CROSS_ATTN_Q: "dec.blk.{bid}.cross_attn_q",
411
+ MODEL_TENSOR.DEC_CROSS_ATTN_K: "dec.blk.{bid}.cross_attn_k",
412
+ MODEL_TENSOR.DEC_CROSS_ATTN_V: "dec.blk.{bid}.cross_attn_v",
413
+ MODEL_TENSOR.DEC_CROSS_ATTN_OUT: "dec.blk.{bid}.cross_attn_o",
414
+ MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: "dec.blk.{bid}.cross_attn_rel_b",
415
+ MODEL_TENSOR.DEC_FFN_NORM: "dec.blk.{bid}.ffn_norm",
416
+ MODEL_TENSOR.DEC_FFN_GATE: "dec.blk.{bid}.ffn_gate",
417
+ MODEL_TENSOR.DEC_FFN_DOWN: "dec.blk.{bid}.ffn_down",
418
+ MODEL_TENSOR.DEC_FFN_UP: "dec.blk.{bid}.ffn_up",
419
+ MODEL_TENSOR.DEC_OUTPUT_NORM: "dec.output_norm",
420
+ MODEL_TENSOR.ENC_ATTN_NORM: "enc.blk.{bid}.attn_norm",
421
+ MODEL_TENSOR.ENC_ATTN_Q: "enc.blk.{bid}.attn_q",
422
+ MODEL_TENSOR.ENC_ATTN_K: "enc.blk.{bid}.attn_k",
423
+ MODEL_TENSOR.ENC_ATTN_V: "enc.blk.{bid}.attn_v",
424
+ MODEL_TENSOR.ENC_ATTN_OUT: "enc.blk.{bid}.attn_o",
425
+ MODEL_TENSOR.ENC_ATTN_REL_B: "enc.blk.{bid}.attn_rel_b",
426
+ MODEL_TENSOR.ENC_FFN_NORM: "enc.blk.{bid}.ffn_norm",
427
+ MODEL_TENSOR.ENC_FFN_GATE: "enc.blk.{bid}.ffn_gate",
428
+ MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
429
+ MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
430
+ MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
269
431
  }
270
432
 
271
433
  MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
@@ -394,6 +556,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
394
556
  MODEL_TENSOR.TOKEN_EMBD,
395
557
  MODEL_TENSOR.TOKEN_EMBD_NORM,
396
558
  MODEL_TENSOR.TOKEN_TYPES,
559
+ MODEL_TENSOR.ATTN_NORM_2,
397
560
  MODEL_TENSOR.ATTN_OUT_NORM,
398
561
  MODEL_TENSOR.ATTN_Q,
399
562
  MODEL_TENSOR.ATTN_Q_NORM,
@@ -624,6 +787,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
624
787
  ],
625
788
  MODEL_ARCH.MINICPM: [
626
789
  MODEL_TENSOR.TOKEN_EMBD,
790
+ MODEL_TENSOR.OUTPUT,
627
791
  MODEL_TENSOR.OUTPUT_NORM,
628
792
  MODEL_TENSOR.ROPE_FREQS,
629
793
  MODEL_TENSOR.ATTN_NORM,
@@ -654,6 +818,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
654
818
  MODEL_TENSOR.FFN_UP,
655
819
  MODEL_TENSOR.FFN_NORM,
656
820
  ],
821
+ MODEL_ARCH.GEMMA2: [
822
+ MODEL_TENSOR.TOKEN_EMBD,
823
+ MODEL_TENSOR.OUTPUT_NORM,
824
+ MODEL_TENSOR.ATTN_Q,
825
+ MODEL_TENSOR.ATTN_K,
826
+ MODEL_TENSOR.ATTN_V,
827
+ MODEL_TENSOR.ATTN_OUT,
828
+ MODEL_TENSOR.FFN_GATE,
829
+ MODEL_TENSOR.FFN_DOWN,
830
+ MODEL_TENSOR.FFN_UP,
831
+ MODEL_TENSOR.ATTN_NORM,
832
+ MODEL_TENSOR.ATTN_POST_NORM,
833
+ MODEL_TENSOR.FFN_PRE_NORM,
834
+ MODEL_TENSOR.FFN_POST_NORM,
835
+ ],
657
836
  MODEL_ARCH.STARCODER2: [
658
837
  MODEL_TENSOR.TOKEN_EMBD,
659
838
  MODEL_TENSOR.OUTPUT_NORM,
@@ -736,6 +915,19 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
736
915
  MODEL_TENSOR.FFN_DOWN,
737
916
  MODEL_TENSOR.FFN_UP,
738
917
  ],
918
+ MODEL_ARCH.OPENELM: [
919
+ MODEL_TENSOR.TOKEN_EMBD,
920
+ MODEL_TENSOR.OUTPUT_NORM,
921
+ MODEL_TENSOR.ATTN_NORM,
922
+ MODEL_TENSOR.ATTN_QKV,
923
+ MODEL_TENSOR.ATTN_Q_NORM,
924
+ MODEL_TENSOR.ATTN_K_NORM,
925
+ MODEL_TENSOR.ATTN_OUT,
926
+ MODEL_TENSOR.FFN_NORM,
927
+ MODEL_TENSOR.FFN_GATE,
928
+ MODEL_TENSOR.FFN_DOWN,
929
+ MODEL_TENSOR.FFN_UP,
930
+ ],
739
931
  MODEL_ARCH.ARCTIC: [
740
932
  MODEL_TENSOR.TOKEN_EMBD,
741
933
  MODEL_TENSOR.OUTPUT_NORM,
@@ -757,6 +949,104 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
757
949
  MODEL_TENSOR.FFN_DOWN_EXP,
758
950
  MODEL_TENSOR.FFN_UP_EXP,
759
951
  ],
952
+ MODEL_ARCH.DEEPSEEK2: [
953
+ MODEL_TENSOR.TOKEN_EMBD,
954
+ MODEL_TENSOR.OUTPUT_NORM,
955
+ MODEL_TENSOR.OUTPUT,
956
+ MODEL_TENSOR.ROPE_FREQS,
957
+ MODEL_TENSOR.ATTN_NORM,
958
+ MODEL_TENSOR.ATTN_Q,
959
+ MODEL_TENSOR.ATTN_Q_A,
960
+ MODEL_TENSOR.ATTN_Q_B,
961
+ MODEL_TENSOR.ATTN_KV_A_MQA,
962
+ MODEL_TENSOR.ATTN_KV_B,
963
+ MODEL_TENSOR.ATTN_Q_A_NORM,
964
+ MODEL_TENSOR.ATTN_KV_A_NORM,
965
+ MODEL_TENSOR.ATTN_OUT,
966
+ MODEL_TENSOR.ATTN_ROT_EMBD,
967
+ MODEL_TENSOR.FFN_GATE_INP,
968
+ MODEL_TENSOR.FFN_NORM,
969
+ MODEL_TENSOR.FFN_GATE,
970
+ MODEL_TENSOR.FFN_DOWN,
971
+ MODEL_TENSOR.FFN_UP,
972
+ MODEL_TENSOR.FFN_GATE_EXP,
973
+ MODEL_TENSOR.FFN_DOWN_EXP,
974
+ MODEL_TENSOR.FFN_UP_EXP,
975
+ MODEL_TENSOR.FFN_GATE_SHEXP,
976
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
977
+ MODEL_TENSOR.FFN_UP_SHEXP,
978
+ ],
979
+ MODEL_ARCH.CHATGLM : [
980
+ MODEL_TENSOR.TOKEN_EMBD,
981
+ MODEL_TENSOR.ROPE_FREQS,
982
+ MODEL_TENSOR.OUTPUT_NORM,
983
+ MODEL_TENSOR.OUTPUT,
984
+ MODEL_TENSOR.ATTN_NORM,
985
+ MODEL_TENSOR.ATTN_QKV,
986
+ MODEL_TENSOR.ATTN_OUT,
987
+ MODEL_TENSOR.FFN_NORM,
988
+ MODEL_TENSOR.FFN_DOWN,
989
+ MODEL_TENSOR.FFN_UP,
990
+ ],
991
+ MODEL_ARCH.BITNET: [
992
+ MODEL_TENSOR.ATTN_Q,
993
+ MODEL_TENSOR.ATTN_K,
994
+ MODEL_TENSOR.ATTN_V,
995
+ MODEL_TENSOR.TOKEN_EMBD,
996
+ MODEL_TENSOR.OUTPUT_NORM,
997
+ MODEL_TENSOR.ATTN_NORM,
998
+ MODEL_TENSOR.ATTN_OUT,
999
+ MODEL_TENSOR.FFN_NORM,
1000
+ MODEL_TENSOR.FFN_GATE,
1001
+ MODEL_TENSOR.FFN_DOWN,
1002
+ MODEL_TENSOR.FFN_UP,
1003
+ MODEL_TENSOR.ATTN_SUB_NORM,
1004
+ MODEL_TENSOR.FFN_SUB_NORM,
1005
+ ],
1006
+ MODEL_ARCH.T5: [
1007
+ MODEL_TENSOR.TOKEN_EMBD,
1008
+ MODEL_TENSOR.OUTPUT,
1009
+ MODEL_TENSOR.DEC_ATTN_NORM,
1010
+ MODEL_TENSOR.DEC_ATTN_Q,
1011
+ MODEL_TENSOR.DEC_ATTN_K,
1012
+ MODEL_TENSOR.DEC_ATTN_V,
1013
+ MODEL_TENSOR.DEC_ATTN_OUT,
1014
+ MODEL_TENSOR.DEC_ATTN_REL_B,
1015
+ MODEL_TENSOR.DEC_CROSS_ATTN_NORM,
1016
+ MODEL_TENSOR.DEC_CROSS_ATTN_Q,
1017
+ MODEL_TENSOR.DEC_CROSS_ATTN_K,
1018
+ MODEL_TENSOR.DEC_CROSS_ATTN_V,
1019
+ MODEL_TENSOR.DEC_CROSS_ATTN_OUT,
1020
+ MODEL_TENSOR.DEC_CROSS_ATTN_REL_B,
1021
+ MODEL_TENSOR.DEC_FFN_NORM,
1022
+ MODEL_TENSOR.DEC_FFN_GATE,
1023
+ MODEL_TENSOR.DEC_FFN_DOWN,
1024
+ MODEL_TENSOR.DEC_FFN_UP,
1025
+ MODEL_TENSOR.DEC_OUTPUT_NORM,
1026
+ MODEL_TENSOR.ENC_ATTN_NORM,
1027
+ MODEL_TENSOR.ENC_ATTN_Q,
1028
+ MODEL_TENSOR.ENC_ATTN_K,
1029
+ MODEL_TENSOR.ENC_ATTN_V,
1030
+ MODEL_TENSOR.ENC_ATTN_OUT,
1031
+ MODEL_TENSOR.ENC_ATTN_REL_B,
1032
+ MODEL_TENSOR.ENC_FFN_NORM,
1033
+ MODEL_TENSOR.ENC_FFN_GATE,
1034
+ MODEL_TENSOR.ENC_FFN_DOWN,
1035
+ MODEL_TENSOR.ENC_FFN_UP,
1036
+ MODEL_TENSOR.ENC_OUTPUT_NORM,
1037
+ ],
1038
+ MODEL_ARCH.JAIS: [
1039
+ MODEL_TENSOR.TOKEN_EMBD,
1040
+ MODEL_TENSOR.OUTPUT_NORM,
1041
+ MODEL_TENSOR.OUTPUT,
1042
+ MODEL_TENSOR.ATTN_NORM,
1043
+ MODEL_TENSOR.ATTN_QKV,
1044
+ MODEL_TENSOR.ATTN_OUT,
1045
+ MODEL_TENSOR.FFN_NORM,
1046
+ MODEL_TENSOR.FFN_DOWN,
1047
+ MODEL_TENSOR.FFN_GATE,
1048
+ MODEL_TENSOR.FFN_UP,
1049
+ ],
760
1050
  # TODO
761
1051
  }
762
1052
 
@@ -790,6 +1080,13 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
790
1080
  MODEL_TENSOR.ROPE_FREQS,
791
1081
  MODEL_TENSOR.ATTN_ROT_EMBD,
792
1082
  ],
1083
+ MODEL_ARCH.DEEPSEEK2: [
1084
+ MODEL_TENSOR.ROPE_FREQS,
1085
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1086
+ ],
1087
+ MODEL_ARCH.CHATGLM: [
1088
+ MODEL_TENSOR.ROPE_FREQS,
1089
+ ],
793
1090
  }
794
1091
 
795
1092
  #
@@ -977,7 +1274,6 @@ KEY_GENERAL_URL = Keys.General.URL
977
1274
  KEY_GENERAL_DESCRIPTION = Keys.General.DESCRIPTION
978
1275
  KEY_GENERAL_LICENSE = Keys.General.LICENSE
979
1276
  KEY_GENERAL_SOURCE_URL = Keys.General.SOURCE_URL
980
- KEY_GENERAL_SOURCE_HF_REPO = Keys.General.SOURCE_HF_REPO
981
1277
  KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE
982
1278
 
983
1279
  # LLM
@@ -1030,4 +1326,4 @@ KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
1030
1326
  KEY_TOKENIZER_PRIFIX_ID = Keys.Tokenizer.PREFIX_ID
1031
1327
  KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
1032
1328
  KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
1033
- KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
1329
+ KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID