bigdl-core-cpp 2.1.0b20240820.post1__py3-none-win_amd64.whl → 2.2.0b20250217.post0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. bigdl/cpp/{convert-hf-to-gguf.py → convert_hf_to_gguf.py} +908 -140
  2. bigdl/cpp/convert_hf_to_gguf_update.py +376 -0
  3. bigdl/cpp/convert_llama_ggml_to_gguf.py +450 -0
  4. bigdl/cpp/convert_lora_to_gguf.py +433 -0
  5. bigdl/cpp/gguf-py/gguf/__init__.py +1 -1
  6. bigdl/cpp/gguf-py/gguf/constants.py +414 -89
  7. bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
  8. bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
  9. bigdl/cpp/gguf-py/gguf/gguf_writer.py +77 -14
  10. bigdl/cpp/gguf-py/gguf/lazy.py +3 -1
  11. bigdl/cpp/gguf-py/gguf/metadata.py +195 -76
  12. bigdl/cpp/gguf-py/gguf/quants.py +1210 -64
  13. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +156 -34
  14. bigdl/cpp/gguf-py/gguf/utility.py +1 -1
  15. bigdl/cpp/gguf-py/gguf/vocab.py +325 -3
  16. bigdl/cpp/libs/common.lib +0 -0
  17. bigdl/cpp/libs/ggml-base.dll +0 -0
  18. bigdl/cpp/libs/ggml-cpu.dll +0 -0
  19. bigdl/cpp/libs/ggml-sycl.dll +0 -0
  20. bigdl/cpp/libs/ggml.dll +0 -0
  21. bigdl/cpp/libs/libc++.dll +0 -0
  22. bigdl/cpp/libs/llama-batched.exe +0 -0
  23. bigdl/cpp/libs/llama-bench.exe +0 -0
  24. bigdl/cpp/libs/llama-cli.exe +0 -0
  25. bigdl/cpp/libs/llama-embedding.exe +0 -0
  26. bigdl/cpp/libs/llama-gguf.exe +0 -0
  27. bigdl/cpp/libs/llama-llava-cli.exe +0 -0
  28. bigdl/cpp/libs/llama-lookup.exe +0 -0
  29. bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
  30. bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
  31. bigdl/cpp/libs/llama-perplexity.exe +0 -0
  32. bigdl/cpp/libs/llama-quantize.exe +0 -0
  33. bigdl/cpp/libs/llama-server.exe +0 -0
  34. bigdl/cpp/libs/llama-simple.exe +0 -0
  35. bigdl/cpp/libs/llama-speculative.exe +0 -0
  36. bigdl/cpp/libs/llama-tokenize.exe +0 -0
  37. bigdl/cpp/libs/llama.dll +0 -0
  38. bigdl/cpp/libs/llava_shared.dll +0 -0
  39. bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
  40. bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
  41. bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
  42. bigdl/cpp/libs/ollama-lib.exe +0 -0
  43. bigdl/cpp/libs/ollama.exe +0 -0
  44. bigdl/cpp/libs/ollama_ggml.dll +0 -0
  45. bigdl/cpp/libs/ollama_llama.dll +0 -0
  46. bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
  47. {bigdl_core_cpp-2.1.0b20240820.post1.data → bigdl_core_cpp-2.2.0b20250217.post0.data}/scripts/init-llama-cpp.bat +7 -2
  48. bigdl_core_cpp-2.2.0b20250217.post0.data/scripts/init-ollama.bat +16 -0
  49. {bigdl_core_cpp-2.1.0b20240820.post1.dist-info → bigdl_core_cpp-2.2.0b20250217.post0.dist-info}/METADATA +9 -5
  50. bigdl_core_cpp-2.2.0b20250217.post0.dist-info/RECORD +56 -0
  51. {bigdl_core_cpp-2.1.0b20240820.post1.dist-info → bigdl_core_cpp-2.2.0b20250217.post0.dist-info}/WHEEL +1 -1
  52. bigdl/cpp/convert.py +0 -1714
  53. bigdl/cpp/libs/baby-llama.exe +0 -0
  54. bigdl/cpp/libs/batched-bench.exe +0 -0
  55. bigdl/cpp/libs/batched.exe +0 -0
  56. bigdl/cpp/libs/beam-search.exe +0 -0
  57. bigdl/cpp/libs/benchmark.exe +0 -0
  58. bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
  59. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
  60. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
  61. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
  62. bigdl/cpp/libs/embedding.exe +0 -0
  63. bigdl/cpp/libs/export-lora.exe +0 -0
  64. bigdl/cpp/libs/finetune.exe +0 -0
  65. bigdl/cpp/libs/ggml_shared.dll +0 -0
  66. bigdl/cpp/libs/gguf.exe +0 -0
  67. bigdl/cpp/libs/gritlm.exe +0 -0
  68. bigdl/cpp/libs/imatrix.exe +0 -0
  69. bigdl/cpp/libs/infill.exe +0 -0
  70. bigdl/cpp/libs/llava-cli.exe +0 -0
  71. bigdl/cpp/libs/lookahead.exe +0 -0
  72. bigdl/cpp/libs/lookup.exe +0 -0
  73. bigdl/cpp/libs/ls-sycl-device.exe +0 -0
  74. bigdl/cpp/libs/main.exe +0 -0
  75. bigdl/cpp/libs/parallel.exe +0 -0
  76. bigdl/cpp/libs/passkey.exe +0 -0
  77. bigdl/cpp/libs/perplexity.exe +0 -0
  78. bigdl/cpp/libs/q8dot.exe +0 -0
  79. bigdl/cpp/libs/quantize-stats.exe +0 -0
  80. bigdl/cpp/libs/quantize.exe +0 -0
  81. bigdl/cpp/libs/save-load-state.exe +0 -0
  82. bigdl/cpp/libs/server.exe +0 -0
  83. bigdl/cpp/libs/simple.exe +0 -0
  84. bigdl/cpp/libs/speculative.exe +0 -0
  85. bigdl/cpp/libs/tokenize.exe +0 -0
  86. bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
  87. bigdl/cpp/libs/vdot.exe +0 -0
  88. bigdl_core_cpp-2.1.0b20240820.post1.data/scripts/init-ollama.bat +0 -13
  89. bigdl_core_cpp-2.1.0b20240820.post1.dist-info/RECORD +0 -63
  90. {bigdl_core_cpp-2.1.0b20240820.post1.data → bigdl_core_cpp-2.2.0b20250217.post0.data}/scripts/init-llama-cpp.ps1 +0 -0
  91. {bigdl_core_cpp-2.1.0b20240820.post1.dist-info → bigdl_core_cpp-2.2.0b20250217.post0.dist-info}/top_level.txt +0 -0
@@ -64,15 +64,27 @@ class Keys:
64
64
  BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
65
65
  BASE_MODEL_VERSION = "general.base_model.{id}.version"
66
66
  BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
67
+ BASE_MODEL_DESCRIPTION = "general.base_model.{id}.description"
67
68
  BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
68
69
  BASE_MODEL_DOI = "general.base_model.{id}.doi"
69
70
  BASE_MODEL_UUID = "general.base_model.{id}.uuid"
70
71
  BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
71
72
 
73
+ # Dataset Source
74
+ DATASET_COUNT = "general.dataset.count"
75
+ DATASET_NAME = "general.dataset.{id}.name"
76
+ DATASET_AUTHOR = "general.dataset.{id}.author"
77
+ DATASET_VERSION = "general.dataset.{id}.version"
78
+ DATASET_ORGANIZATION = "general.dataset.{id}.organization"
79
+ DATASET_DESCRIPTION = "general.dataset.{id}.description"
80
+ DATASET_URL = "general.dataset.{id}.url" # Model Website/Paper
81
+ DATASET_DOI = "general.dataset.{id}.doi"
82
+ DATASET_UUID = "general.dataset.{id}.uuid"
83
+ DATASET_REPO_URL = "general.dataset.{id}.repo_url" # Model Source Repository (git/svn/etc...)
84
+
72
85
  # Array based KV stores
73
86
  TAGS = "general.tags"
74
87
  LANGUAGES = "general.languages"
75
- DATASETS = "general.datasets"
76
88
 
77
89
  class LLM:
78
90
  VOCAB_SIZE = "{arch}.vocab_size"
@@ -94,6 +106,12 @@ class Keys:
94
106
  DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
95
107
  ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
96
108
  FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
109
+ SWIN_NORM = "{arch}.swin_norm"
110
+ RESCALE_EVERY_N_LAYERS = "{arch}.rescale_every_n_layers"
111
+ TIME_MIX_EXTRA_DIM = "{arch}.time_mix_extra_dim"
112
+ TIME_DECAY_EXTRA_DIM = "{arch}.time_decay_extra_dim"
113
+ RESIDUAL_SCALE = "{arch}.residual_scale"
114
+ EMBEDDING_SCALE = "{arch}.embedding_scale"
97
115
 
98
116
  class Attention:
99
117
  HEAD_COUNT = "{arch}.attention.head_count"
@@ -109,9 +127,11 @@ class Keys:
109
127
  KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
110
128
  REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
111
129
  SLIDING_WINDOW = "{arch}.attention.sliding_window"
130
+ SCALE = "{arch}.attention.scale"
112
131
 
113
132
  class Rope:
114
133
  DIMENSION_COUNT = "{arch}.rope.dimension_count"
134
+ DIMENSION_SECTIONS = "{arch}.rope.dimension_sections"
115
135
  FREQ_BASE = "{arch}.rope.freq_base"
116
136
  SCALING_TYPE = "{arch}.rope.scaling.type"
117
137
  SCALING_FACTOR = "{arch}.rope.scaling.factor"
@@ -130,6 +150,10 @@ class Keys:
130
150
  INNER_SIZE = "{arch}.ssm.inner_size"
131
151
  STATE_SIZE = "{arch}.ssm.state_size"
132
152
  TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
153
+ DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
154
+
155
+ class WKV:
156
+ HEAD_SIZE = "{arch}.wkv.head_size"
133
157
 
134
158
  class Tokenizer:
135
159
  MODEL = "tokenizer.ggml.model"
@@ -141,6 +165,8 @@ class Keys:
141
165
  MERGES = "tokenizer.ggml.merges"
142
166
  BOS_ID = "tokenizer.ggml.bos_token_id"
143
167
  EOS_ID = "tokenizer.ggml.eos_token_id"
168
+ EOT_ID = "tokenizer.ggml.eot_token_id"
169
+ EOM_ID = "tokenizer.ggml.eom_token_id"
144
170
  UNK_ID = "tokenizer.ggml.unknown_token_id"
145
171
  SEP_ID = "tokenizer.ggml.seperator_token_id"
146
172
  PAD_ID = "tokenizer.ggml.padding_token_id"
@@ -157,10 +183,16 @@ class Keys:
157
183
  CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
158
184
  CHAT_TEMPLATES = "tokenizer.chat_templates"
159
185
  # FIM/Infill special tokens constants
186
+ FIM_PRE_ID = "tokenizer.ggml.fim_pre_token_id"
187
+ FIM_SUF_ID = "tokenizer.ggml.fim_suf_token_id"
188
+ FIM_MID_ID = "tokenizer.ggml.fim_mid_token_id"
189
+ FIM_PAD_ID = "tokenizer.ggml.fim_pad_token_id"
190
+ FIM_REP_ID = "tokenizer.ggml.fim_rep_token_id"
191
+ FIM_SEP_ID = "tokenizer.ggml.fim_sep_token_id"
192
+ # deprecated:
160
193
  PREFIX_ID = "tokenizer.ggml.prefix_token_id"
161
194
  SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
162
195
  MIDDLE_ID = "tokenizer.ggml.middle_token_id"
163
- EOT_ID = "tokenizer.ggml.eot_token_id"
164
196
 
165
197
  class Adapter:
166
198
  TYPE = "adapter.type"
@@ -195,6 +227,7 @@ class MODEL_ARCH(IntEnum):
195
227
  QWEN = auto()
196
228
  QWEN2 = auto()
197
229
  QWEN2MOE = auto()
230
+ QWEN2VL = auto()
198
231
  PHI2 = auto()
199
232
  PHI3 = auto()
200
233
  PLAMO = auto()
@@ -202,21 +235,31 @@ class MODEL_ARCH(IntEnum):
202
235
  ORION = auto()
203
236
  INTERNLM2 = auto()
204
237
  MINICPM = auto()
238
+ MINICPM3 = auto()
205
239
  GEMMA = auto()
206
240
  GEMMA2 = auto()
207
241
  STARCODER2 = auto()
242
+ RWKV6 = auto()
208
243
  MAMBA = auto()
209
244
  XVERSE = auto()
210
245
  COMMAND_R = auto()
211
246
  DBRX = auto()
212
247
  OLMO = auto()
248
+ OLMO2 = auto()
249
+ OLMOE = auto()
213
250
  OPENELM = auto()
214
251
  ARCTIC = auto()
215
252
  DEEPSEEK2 = auto()
216
253
  CHATGLM = auto()
217
254
  BITNET = auto()
218
255
  T5 = auto()
256
+ T5ENCODER = auto()
219
257
  JAIS = auto()
258
+ NEMOTRON = auto()
259
+ EXAONE = auto()
260
+ GRANITE = auto()
261
+ GRANITE_MOE = auto()
262
+ CHAMELEON = auto()
220
263
 
221
264
 
222
265
  class MODEL_TENSOR(IntEnum):
@@ -265,6 +308,29 @@ class MODEL_TENSOR(IntEnum):
265
308
  SSM_A = auto()
266
309
  SSM_D = auto()
267
310
  SSM_OUT = auto()
311
+ TIME_MIX_W1 = auto()
312
+ TIME_MIX_W2 = auto()
313
+ TIME_MIX_LERP_X = auto()
314
+ TIME_MIX_LERP_K = auto()
315
+ TIME_MIX_LERP_V = auto()
316
+ TIME_MIX_LERP_R = auto()
317
+ TIME_MIX_LERP_G = auto()
318
+ TIME_MIX_LERP_W = auto()
319
+ TIME_MIX_FIRST = auto()
320
+ TIME_MIX_DECAY = auto()
321
+ TIME_MIX_DECAY_W1 = auto()
322
+ TIME_MIX_DECAY_W2 = auto()
323
+ TIME_MIX_KEY = auto()
324
+ TIME_MIX_VALUE = auto()
325
+ TIME_MIX_RECEPTANCE = auto()
326
+ TIME_MIX_GATE = auto()
327
+ TIME_MIX_LN = auto()
328
+ TIME_MIX_OUTPUT = auto()
329
+ CHANNEL_MIX_LERP_K = auto()
330
+ CHANNEL_MIX_LERP_R = auto()
331
+ CHANNEL_MIX_KEY = auto()
332
+ CHANNEL_MIX_RECEPTANCE = auto()
333
+ CHANNEL_MIX_VALUE = auto()
268
334
  ATTN_Q_A = auto()
269
335
  ATTN_Q_B = auto()
270
336
  ATTN_KV_A_MQA = auto()
@@ -301,6 +367,8 @@ class MODEL_TENSOR(IntEnum):
301
367
  ENC_FFN_DOWN = auto()
302
368
  ENC_FFN_UP = auto()
303
369
  ENC_OUTPUT_NORM = auto()
370
+ CLS = auto() # classifier
371
+ CLS_OUT = auto() # classifier output projection
304
372
 
305
373
 
306
374
  MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
@@ -322,6 +390,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
322
390
  MODEL_ARCH.QWEN: "qwen",
323
391
  MODEL_ARCH.QWEN2: "qwen2",
324
392
  MODEL_ARCH.QWEN2MOE: "qwen2moe",
393
+ MODEL_ARCH.QWEN2VL: "qwen2vl",
325
394
  MODEL_ARCH.PHI2: "phi2",
326
395
  MODEL_ARCH.PHI3: "phi3",
327
396
  MODEL_ARCH.PLAMO: "plamo",
@@ -329,105 +398,140 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
329
398
  MODEL_ARCH.ORION: "orion",
330
399
  MODEL_ARCH.INTERNLM2: "internlm2",
331
400
  MODEL_ARCH.MINICPM: "minicpm",
401
+ MODEL_ARCH.MINICPM3: "minicpm3",
332
402
  MODEL_ARCH.GEMMA: "gemma",
333
403
  MODEL_ARCH.GEMMA2: "gemma2",
334
404
  MODEL_ARCH.STARCODER2: "starcoder2",
405
+ MODEL_ARCH.RWKV6: "rwkv6",
335
406
  MODEL_ARCH.MAMBA: "mamba",
336
407
  MODEL_ARCH.XVERSE: "xverse",
337
408
  MODEL_ARCH.COMMAND_R: "command-r",
338
409
  MODEL_ARCH.DBRX: "dbrx",
339
410
  MODEL_ARCH.OLMO: "olmo",
411
+ MODEL_ARCH.OLMO2: "olmo2",
412
+ MODEL_ARCH.OLMOE: "olmoe",
340
413
  MODEL_ARCH.OPENELM: "openelm",
341
414
  MODEL_ARCH.ARCTIC: "arctic",
342
415
  MODEL_ARCH.DEEPSEEK2: "deepseek2",
343
416
  MODEL_ARCH.CHATGLM: "chatglm",
344
417
  MODEL_ARCH.BITNET: "bitnet",
345
418
  MODEL_ARCH.T5: "t5",
419
+ MODEL_ARCH.T5ENCODER: "t5encoder",
346
420
  MODEL_ARCH.JAIS: "jais",
421
+ MODEL_ARCH.NEMOTRON: "nemotron",
422
+ MODEL_ARCH.EXAONE: "exaone",
423
+ MODEL_ARCH.GRANITE: "granite",
424
+ MODEL_ARCH.GRANITE_MOE: "granitemoe",
425
+ MODEL_ARCH.CHAMELEON: "chameleon",
347
426
  }
348
427
 
349
428
  TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
350
- MODEL_TENSOR.TOKEN_EMBD: "token_embd",
351
- MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
352
- MODEL_TENSOR.TOKEN_TYPES: "token_types",
353
- MODEL_TENSOR.POS_EMBD: "position_embd",
354
- MODEL_TENSOR.OUTPUT_NORM: "output_norm",
355
- MODEL_TENSOR.OUTPUT: "output",
356
- MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
357
- MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
358
- MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
359
- MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
360
- MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
361
- MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
362
- MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
363
- MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
364
- MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
365
- MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
366
- MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
367
- MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
368
- MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
369
- MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
370
- MODEL_TENSOR.ATTN_POST_NORM: "blk.{bid}.post_attention_norm",
371
- MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
372
- MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
373
- MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
374
- MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
375
- MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
376
- MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
377
- MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
378
- MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
379
- MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
380
- MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
381
- MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
382
- MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
383
- MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
384
- MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
385
- MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
386
- MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
387
- MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
388
- MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
389
- MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
390
- MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
391
- MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
392
- MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
393
- MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
394
- MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
395
- MODEL_TENSOR.ATTN_Q_A: "blk.{bid}.attn_q_a",
396
- MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
397
- MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
398
- MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
399
- MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
400
- MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
401
- MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
402
- MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
403
- MODEL_TENSOR.DEC_ATTN_NORM: "dec.blk.{bid}.attn_norm",
404
- MODEL_TENSOR.DEC_ATTN_Q: "dec.blk.{bid}.attn_q",
405
- MODEL_TENSOR.DEC_ATTN_K: "dec.blk.{bid}.attn_k",
406
- MODEL_TENSOR.DEC_ATTN_V: "dec.blk.{bid}.attn_v",
407
- MODEL_TENSOR.DEC_ATTN_OUT: "dec.blk.{bid}.attn_o",
408
- MODEL_TENSOR.DEC_ATTN_REL_B: "dec.blk.{bid}.attn_rel_b",
409
- MODEL_TENSOR.DEC_CROSS_ATTN_NORM: "dec.blk.{bid}.cross_attn_norm",
410
- MODEL_TENSOR.DEC_CROSS_ATTN_Q: "dec.blk.{bid}.cross_attn_q",
411
- MODEL_TENSOR.DEC_CROSS_ATTN_K: "dec.blk.{bid}.cross_attn_k",
412
- MODEL_TENSOR.DEC_CROSS_ATTN_V: "dec.blk.{bid}.cross_attn_v",
413
- MODEL_TENSOR.DEC_CROSS_ATTN_OUT: "dec.blk.{bid}.cross_attn_o",
414
- MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: "dec.blk.{bid}.cross_attn_rel_b",
415
- MODEL_TENSOR.DEC_FFN_NORM: "dec.blk.{bid}.ffn_norm",
416
- MODEL_TENSOR.DEC_FFN_GATE: "dec.blk.{bid}.ffn_gate",
417
- MODEL_TENSOR.DEC_FFN_DOWN: "dec.blk.{bid}.ffn_down",
418
- MODEL_TENSOR.DEC_FFN_UP: "dec.blk.{bid}.ffn_up",
419
- MODEL_TENSOR.DEC_OUTPUT_NORM: "dec.output_norm",
420
- MODEL_TENSOR.ENC_ATTN_NORM: "enc.blk.{bid}.attn_norm",
421
- MODEL_TENSOR.ENC_ATTN_Q: "enc.blk.{bid}.attn_q",
422
- MODEL_TENSOR.ENC_ATTN_K: "enc.blk.{bid}.attn_k",
423
- MODEL_TENSOR.ENC_ATTN_V: "enc.blk.{bid}.attn_v",
424
- MODEL_TENSOR.ENC_ATTN_OUT: "enc.blk.{bid}.attn_o",
425
- MODEL_TENSOR.ENC_ATTN_REL_B: "enc.blk.{bid}.attn_rel_b",
426
- MODEL_TENSOR.ENC_FFN_NORM: "enc.blk.{bid}.ffn_norm",
427
- MODEL_TENSOR.ENC_FFN_GATE: "enc.blk.{bid}.ffn_gate",
428
- MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
429
- MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
430
- MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
429
+ MODEL_TENSOR.TOKEN_EMBD: "token_embd",
430
+ MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
431
+ MODEL_TENSOR.TOKEN_TYPES: "token_types",
432
+ MODEL_TENSOR.POS_EMBD: "position_embd",
433
+ MODEL_TENSOR.OUTPUT_NORM: "output_norm",
434
+ MODEL_TENSOR.OUTPUT: "output",
435
+ MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
436
+ MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
437
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
438
+ MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
439
+ MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
440
+ MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
441
+ MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
442
+ MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
443
+ MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
444
+ MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
445
+ MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
446
+ MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
447
+ MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
448
+ MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
449
+ MODEL_TENSOR.ATTN_POST_NORM: "blk.{bid}.post_attention_norm",
450
+ MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
451
+ MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
452
+ MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
453
+ MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
454
+ MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
455
+ MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
456
+ MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
457
+ MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
458
+ MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
459
+ MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
460
+ MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
461
+ MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
462
+ MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
463
+ MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
464
+ MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
465
+ MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
466
+ MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
467
+ MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
468
+ MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
469
+ MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
470
+ MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
471
+ MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
472
+ MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
473
+ MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
474
+ MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
475
+ MODEL_TENSOR.TIME_MIX_W2: "blk.{bid}.time_mix_w2",
476
+ MODEL_TENSOR.TIME_MIX_LERP_X: "blk.{bid}.time_mix_lerp_x",
477
+ MODEL_TENSOR.TIME_MIX_LERP_K: "blk.{bid}.time_mix_lerp_k",
478
+ MODEL_TENSOR.TIME_MIX_LERP_V: "blk.{bid}.time_mix_lerp_v",
479
+ MODEL_TENSOR.TIME_MIX_LERP_R: "blk.{bid}.time_mix_lerp_r",
480
+ MODEL_TENSOR.TIME_MIX_LERP_G: "blk.{bid}.time_mix_lerp_g",
481
+ MODEL_TENSOR.TIME_MIX_LERP_W: "blk.{bid}.time_mix_lerp_w",
482
+ MODEL_TENSOR.TIME_MIX_FIRST: "blk.{bid}.time_mix_first",
483
+ MODEL_TENSOR.TIME_MIX_DECAY: "blk.{bid}.time_mix_decay",
484
+ MODEL_TENSOR.TIME_MIX_DECAY_W1: "blk.{bid}.time_mix_decay_w1",
485
+ MODEL_TENSOR.TIME_MIX_DECAY_W2: "blk.{bid}.time_mix_decay_w2",
486
+ MODEL_TENSOR.TIME_MIX_KEY: "blk.{bid}.time_mix_key",
487
+ MODEL_TENSOR.TIME_MIX_VALUE: "blk.{bid}.time_mix_value",
488
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE: "blk.{bid}.time_mix_receptance",
489
+ MODEL_TENSOR.TIME_MIX_GATE: "blk.{bid}.time_mix_gate",
490
+ MODEL_TENSOR.TIME_MIX_LN: "blk.{bid}.time_mix_ln",
491
+ MODEL_TENSOR.TIME_MIX_OUTPUT: "blk.{bid}.time_mix_output",
492
+ MODEL_TENSOR.CHANNEL_MIX_LERP_K: "blk.{bid}.channel_mix_lerp_k",
493
+ MODEL_TENSOR.CHANNEL_MIX_LERP_R: "blk.{bid}.channel_mix_lerp_r",
494
+ MODEL_TENSOR.CHANNEL_MIX_KEY: "blk.{bid}.channel_mix_key",
495
+ MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE: "blk.{bid}.channel_mix_receptance",
496
+ MODEL_TENSOR.CHANNEL_MIX_VALUE: "blk.{bid}.channel_mix_value",
497
+ MODEL_TENSOR.ATTN_Q_A: "blk.{bid}.attn_q_a",
498
+ MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
499
+ MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
500
+ MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
501
+ MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
502
+ MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
503
+ MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
504
+ MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
505
+ MODEL_TENSOR.DEC_ATTN_NORM: "dec.blk.{bid}.attn_norm",
506
+ MODEL_TENSOR.DEC_ATTN_Q: "dec.blk.{bid}.attn_q",
507
+ MODEL_TENSOR.DEC_ATTN_K: "dec.blk.{bid}.attn_k",
508
+ MODEL_TENSOR.DEC_ATTN_V: "dec.blk.{bid}.attn_v",
509
+ MODEL_TENSOR.DEC_ATTN_OUT: "dec.blk.{bid}.attn_o",
510
+ MODEL_TENSOR.DEC_ATTN_REL_B: "dec.blk.{bid}.attn_rel_b",
511
+ MODEL_TENSOR.DEC_CROSS_ATTN_NORM: "dec.blk.{bid}.cross_attn_norm",
512
+ MODEL_TENSOR.DEC_CROSS_ATTN_Q: "dec.blk.{bid}.cross_attn_q",
513
+ MODEL_TENSOR.DEC_CROSS_ATTN_K: "dec.blk.{bid}.cross_attn_k",
514
+ MODEL_TENSOR.DEC_CROSS_ATTN_V: "dec.blk.{bid}.cross_attn_v",
515
+ MODEL_TENSOR.DEC_CROSS_ATTN_OUT: "dec.blk.{bid}.cross_attn_o",
516
+ MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: "dec.blk.{bid}.cross_attn_rel_b",
517
+ MODEL_TENSOR.DEC_FFN_NORM: "dec.blk.{bid}.ffn_norm",
518
+ MODEL_TENSOR.DEC_FFN_GATE: "dec.blk.{bid}.ffn_gate",
519
+ MODEL_TENSOR.DEC_FFN_DOWN: "dec.blk.{bid}.ffn_down",
520
+ MODEL_TENSOR.DEC_FFN_UP: "dec.blk.{bid}.ffn_up",
521
+ MODEL_TENSOR.DEC_OUTPUT_NORM: "dec.output_norm",
522
+ MODEL_TENSOR.ENC_ATTN_NORM: "enc.blk.{bid}.attn_norm",
523
+ MODEL_TENSOR.ENC_ATTN_Q: "enc.blk.{bid}.attn_q",
524
+ MODEL_TENSOR.ENC_ATTN_K: "enc.blk.{bid}.attn_k",
525
+ MODEL_TENSOR.ENC_ATTN_V: "enc.blk.{bid}.attn_v",
526
+ MODEL_TENSOR.ENC_ATTN_OUT: "enc.blk.{bid}.attn_o",
527
+ MODEL_TENSOR.ENC_ATTN_REL_B: "enc.blk.{bid}.attn_rel_b",
528
+ MODEL_TENSOR.ENC_FFN_NORM: "enc.blk.{bid}.ffn_norm",
529
+ MODEL_TENSOR.ENC_FFN_GATE: "enc.blk.{bid}.ffn_gate",
530
+ MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
531
+ MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
532
+ MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
533
+ MODEL_TENSOR.CLS: "cls",
534
+ MODEL_TENSOR.CLS_OUT: "cls.output",
431
535
  }
432
536
 
433
537
  MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
@@ -537,6 +641,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
537
641
  MODEL_TENSOR.FFN_DOWN,
538
642
  MODEL_TENSOR.FFN_UP,
539
643
  MODEL_TENSOR.LAYER_OUT_NORM,
644
+ MODEL_TENSOR.CLS,
645
+ MODEL_TENSOR.CLS_OUT,
540
646
  ],
541
647
  MODEL_ARCH.NOMIC_BERT: [
542
648
  MODEL_TENSOR.TOKEN_EMBD,
@@ -568,6 +674,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
568
674
  MODEL_TENSOR.FFN_GATE,
569
675
  MODEL_TENSOR.FFN_DOWN,
570
676
  MODEL_TENSOR.LAYER_OUT_NORM,
677
+ MODEL_TENSOR.CLS,
571
678
  ],
572
679
  MODEL_ARCH.MPT: [
573
680
  MODEL_TENSOR.TOKEN_EMBD,
@@ -654,6 +761,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
654
761
  MODEL_TENSOR.FFN_UP,
655
762
  ],
656
763
  MODEL_ARCH.QWEN2: [
764
+ MODEL_TENSOR.TOKEN_EMBD,
765
+ MODEL_TENSOR.OUTPUT_NORM,
766
+ MODEL_TENSOR.OUTPUT,
767
+ MODEL_TENSOR.ROPE_FREQS,
768
+ MODEL_TENSOR.ATTN_NORM,
769
+ MODEL_TENSOR.ATTN_Q,
770
+ MODEL_TENSOR.ATTN_K,
771
+ MODEL_TENSOR.ATTN_V,
772
+ MODEL_TENSOR.ATTN_OUT,
773
+ MODEL_TENSOR.FFN_NORM,
774
+ MODEL_TENSOR.FFN_GATE,
775
+ MODEL_TENSOR.FFN_DOWN,
776
+ MODEL_TENSOR.FFN_UP,
777
+ ],
778
+ MODEL_ARCH.QWEN2VL: [
657
779
  MODEL_TENSOR.TOKEN_EMBD,
658
780
  MODEL_TENSOR.OUTPUT_NORM,
659
781
  MODEL_TENSOR.OUTPUT,
@@ -731,6 +853,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
731
853
  MODEL_TENSOR.TOKEN_EMBD,
732
854
  MODEL_TENSOR.OUTPUT_NORM,
733
855
  MODEL_TENSOR.OUTPUT,
856
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
857
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
734
858
  MODEL_TENSOR.ATTN_NORM,
735
859
  MODEL_TENSOR.ATTN_QKV,
736
860
  MODEL_TENSOR.ATTN_Q,
@@ -790,6 +914,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
790
914
  MODEL_TENSOR.OUTPUT,
791
915
  MODEL_TENSOR.OUTPUT_NORM,
792
916
  MODEL_TENSOR.ROPE_FREQS,
917
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
918
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
793
919
  MODEL_TENSOR.ATTN_NORM,
794
920
  MODEL_TENSOR.ATTN_Q,
795
921
  MODEL_TENSOR.ATTN_K,
@@ -805,6 +931,25 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
805
931
  MODEL_TENSOR.FFN_DOWN_EXP,
806
932
  MODEL_TENSOR.FFN_UP_EXP,
807
933
  ],
934
+ MODEL_ARCH.MINICPM3: [
935
+ MODEL_TENSOR.TOKEN_EMBD,
936
+ MODEL_TENSOR.OUTPUT_NORM,
937
+ MODEL_TENSOR.OUTPUT,
938
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
939
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
940
+ MODEL_TENSOR.ATTN_NORM,
941
+ MODEL_TENSOR.ATTN_Q_A,
942
+ MODEL_TENSOR.ATTN_Q_B,
943
+ MODEL_TENSOR.ATTN_KV_A_MQA,
944
+ MODEL_TENSOR.ATTN_KV_B,
945
+ MODEL_TENSOR.ATTN_Q_A_NORM,
946
+ MODEL_TENSOR.ATTN_KV_A_NORM,
947
+ MODEL_TENSOR.ATTN_OUT,
948
+ MODEL_TENSOR.FFN_NORM,
949
+ MODEL_TENSOR.FFN_GATE,
950
+ MODEL_TENSOR.FFN_DOWN,
951
+ MODEL_TENSOR.FFN_UP,
952
+ ],
808
953
  MODEL_ARCH.GEMMA: [
809
954
  MODEL_TENSOR.TOKEN_EMBD,
810
955
  MODEL_TENSOR.OUTPUT_NORM,
@@ -848,6 +993,37 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
848
993
  MODEL_TENSOR.FFN_DOWN,
849
994
  MODEL_TENSOR.FFN_UP,
850
995
  ],
996
+ MODEL_ARCH.RWKV6: [
997
+ MODEL_TENSOR.TOKEN_EMBD,
998
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
999
+ MODEL_TENSOR.OUTPUT_NORM,
1000
+ MODEL_TENSOR.OUTPUT,
1001
+ MODEL_TENSOR.ATTN_NORM,
1002
+ MODEL_TENSOR.ATTN_NORM_2,
1003
+ MODEL_TENSOR.TIME_MIX_W1,
1004
+ MODEL_TENSOR.TIME_MIX_W2,
1005
+ MODEL_TENSOR.TIME_MIX_LERP_X,
1006
+ MODEL_TENSOR.TIME_MIX_LERP_K,
1007
+ MODEL_TENSOR.TIME_MIX_LERP_V,
1008
+ MODEL_TENSOR.TIME_MIX_LERP_R,
1009
+ MODEL_TENSOR.TIME_MIX_LERP_G,
1010
+ MODEL_TENSOR.TIME_MIX_LERP_W,
1011
+ MODEL_TENSOR.TIME_MIX_FIRST,
1012
+ MODEL_TENSOR.TIME_MIX_DECAY,
1013
+ MODEL_TENSOR.TIME_MIX_DECAY_W1,
1014
+ MODEL_TENSOR.TIME_MIX_DECAY_W2,
1015
+ MODEL_TENSOR.TIME_MIX_KEY,
1016
+ MODEL_TENSOR.TIME_MIX_VALUE,
1017
+ MODEL_TENSOR.TIME_MIX_RECEPTANCE,
1018
+ MODEL_TENSOR.TIME_MIX_GATE,
1019
+ MODEL_TENSOR.TIME_MIX_LN,
1020
+ MODEL_TENSOR.TIME_MIX_OUTPUT,
1021
+ MODEL_TENSOR.CHANNEL_MIX_LERP_K,
1022
+ MODEL_TENSOR.CHANNEL_MIX_LERP_R,
1023
+ MODEL_TENSOR.CHANNEL_MIX_KEY,
1024
+ MODEL_TENSOR.CHANNEL_MIX_RECEPTANCE,
1025
+ MODEL_TENSOR.CHANNEL_MIX_VALUE,
1026
+ ],
851
1027
  MODEL_ARCH.MAMBA: [
852
1028
  MODEL_TENSOR.TOKEN_EMBD,
853
1029
  MODEL_TENSOR.OUTPUT_NORM,
@@ -915,6 +1091,39 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
915
1091
  MODEL_TENSOR.FFN_DOWN,
916
1092
  MODEL_TENSOR.FFN_UP,
917
1093
  ],
1094
+ MODEL_ARCH.OLMO2: [
1095
+ MODEL_TENSOR.TOKEN_EMBD,
1096
+ MODEL_TENSOR.OUTPUT_NORM,
1097
+ MODEL_TENSOR.OUTPUT,
1098
+ MODEL_TENSOR.ATTN_Q,
1099
+ MODEL_TENSOR.ATTN_K,
1100
+ MODEL_TENSOR.ATTN_V,
1101
+ MODEL_TENSOR.ATTN_OUT,
1102
+ MODEL_TENSOR.ATTN_POST_NORM,
1103
+ MODEL_TENSOR.ATTN_Q_NORM,
1104
+ MODEL_TENSOR.ATTN_K_NORM,
1105
+ MODEL_TENSOR.FFN_POST_NORM,
1106
+ MODEL_TENSOR.FFN_GATE,
1107
+ MODEL_TENSOR.FFN_DOWN,
1108
+ MODEL_TENSOR.FFN_UP,
1109
+ ],
1110
+ MODEL_ARCH.OLMOE: [
1111
+ MODEL_TENSOR.TOKEN_EMBD,
1112
+ MODEL_TENSOR.OUTPUT_NORM,
1113
+ MODEL_TENSOR.OUTPUT,
1114
+ MODEL_TENSOR.ATTN_OUT,
1115
+ MODEL_TENSOR.ATTN_Q,
1116
+ MODEL_TENSOR.ATTN_K,
1117
+ MODEL_TENSOR.ATTN_V,
1118
+ MODEL_TENSOR.ATTN_NORM,
1119
+ MODEL_TENSOR.ATTN_Q_NORM,
1120
+ MODEL_TENSOR.ATTN_K_NORM,
1121
+ MODEL_TENSOR.FFN_NORM,
1122
+ MODEL_TENSOR.FFN_GATE_INP,
1123
+ MODEL_TENSOR.FFN_GATE_EXP,
1124
+ MODEL_TENSOR.FFN_UP_EXP,
1125
+ MODEL_TENSOR.FFN_DOWN_EXP,
1126
+ ],
918
1127
  MODEL_ARCH.OPENELM: [
919
1128
  MODEL_TENSOR.TOKEN_EMBD,
920
1129
  MODEL_TENSOR.OUTPUT_NORM,
@@ -1035,6 +1244,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1035
1244
  MODEL_TENSOR.ENC_FFN_UP,
1036
1245
  MODEL_TENSOR.ENC_OUTPUT_NORM,
1037
1246
  ],
1247
+ MODEL_ARCH.T5ENCODER: [
1248
+ MODEL_TENSOR.TOKEN_EMBD,
1249
+ MODEL_TENSOR.OUTPUT,
1250
+ MODEL_TENSOR.ENC_ATTN_NORM,
1251
+ MODEL_TENSOR.ENC_ATTN_Q,
1252
+ MODEL_TENSOR.ENC_ATTN_K,
1253
+ MODEL_TENSOR.ENC_ATTN_V,
1254
+ MODEL_TENSOR.ENC_ATTN_OUT,
1255
+ MODEL_TENSOR.ENC_ATTN_REL_B,
1256
+ MODEL_TENSOR.ENC_FFN_NORM,
1257
+ MODEL_TENSOR.ENC_FFN_GATE,
1258
+ MODEL_TENSOR.ENC_FFN_DOWN,
1259
+ MODEL_TENSOR.ENC_FFN_UP,
1260
+ MODEL_TENSOR.ENC_OUTPUT_NORM,
1261
+ ],
1038
1262
  MODEL_ARCH.JAIS: [
1039
1263
  MODEL_TENSOR.TOKEN_EMBD,
1040
1264
  MODEL_TENSOR.OUTPUT_NORM,
@@ -1047,6 +1271,82 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1047
1271
  MODEL_TENSOR.FFN_GATE,
1048
1272
  MODEL_TENSOR.FFN_UP,
1049
1273
  ],
1274
+ MODEL_ARCH.NEMOTRON: [
1275
+ MODEL_TENSOR.TOKEN_EMBD,
1276
+ MODEL_TENSOR.OUTPUT_NORM,
1277
+ MODEL_TENSOR.OUTPUT,
1278
+ MODEL_TENSOR.ROPE_FREQS,
1279
+ MODEL_TENSOR.ATTN_NORM,
1280
+ MODEL_TENSOR.ATTN_Q,
1281
+ MODEL_TENSOR.ATTN_K,
1282
+ MODEL_TENSOR.ATTN_V,
1283
+ MODEL_TENSOR.ATTN_OUT,
1284
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1285
+ MODEL_TENSOR.FFN_NORM,
1286
+ MODEL_TENSOR.FFN_DOWN,
1287
+ MODEL_TENSOR.FFN_UP,
1288
+ ],
1289
+ MODEL_ARCH.EXAONE: [
1290
+ MODEL_TENSOR.TOKEN_EMBD,
1291
+ MODEL_TENSOR.OUTPUT_NORM,
1292
+ MODEL_TENSOR.OUTPUT,
1293
+ MODEL_TENSOR.ROPE_FREQS,
1294
+ MODEL_TENSOR.ATTN_NORM,
1295
+ MODEL_TENSOR.ATTN_Q,
1296
+ MODEL_TENSOR.ATTN_K,
1297
+ MODEL_TENSOR.ATTN_V,
1298
+ MODEL_TENSOR.ATTN_OUT,
1299
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1300
+ MODEL_TENSOR.FFN_NORM,
1301
+ MODEL_TENSOR.FFN_GATE,
1302
+ MODEL_TENSOR.FFN_DOWN,
1303
+ MODEL_TENSOR.FFN_UP,
1304
+ ],
1305
+ MODEL_ARCH.GRANITE: [
1306
+ MODEL_TENSOR.TOKEN_EMBD,
1307
+ MODEL_TENSOR.OUTPUT_NORM,
1308
+ MODEL_TENSOR.OUTPUT,
1309
+ MODEL_TENSOR.ATTN_NORM,
1310
+ MODEL_TENSOR.ATTN_Q,
1311
+ MODEL_TENSOR.ATTN_K,
1312
+ MODEL_TENSOR.ATTN_V,
1313
+ MODEL_TENSOR.ATTN_OUT,
1314
+ MODEL_TENSOR.FFN_NORM,
1315
+ MODEL_TENSOR.FFN_GATE,
1316
+ MODEL_TENSOR.FFN_DOWN,
1317
+ MODEL_TENSOR.FFN_UP,
1318
+ ],
1319
+ MODEL_ARCH.GRANITE_MOE: [
1320
+ MODEL_TENSOR.TOKEN_EMBD,
1321
+ MODEL_TENSOR.OUTPUT_NORM,
1322
+ MODEL_TENSOR.OUTPUT,
1323
+ MODEL_TENSOR.ATTN_NORM,
1324
+ MODEL_TENSOR.ATTN_Q,
1325
+ MODEL_TENSOR.ATTN_K,
1326
+ MODEL_TENSOR.ATTN_V,
1327
+ MODEL_TENSOR.ATTN_OUT,
1328
+ MODEL_TENSOR.FFN_NORM,
1329
+ MODEL_TENSOR.FFN_GATE_INP,
1330
+ MODEL_TENSOR.FFN_GATE_EXP,
1331
+ MODEL_TENSOR.FFN_DOWN_EXP,
1332
+ MODEL_TENSOR.FFN_UP_EXP,
1333
+ ],
1334
+ MODEL_ARCH.CHAMELEON: [
1335
+ MODEL_TENSOR.TOKEN_EMBD,
1336
+ MODEL_TENSOR.OUTPUT_NORM,
1337
+ MODEL_TENSOR.OUTPUT,
1338
+ MODEL_TENSOR.ATTN_NORM,
1339
+ MODEL_TENSOR.ATTN_Q,
1340
+ MODEL_TENSOR.ATTN_Q_NORM,
1341
+ MODEL_TENSOR.ATTN_K,
1342
+ MODEL_TENSOR.ATTN_K_NORM,
1343
+ MODEL_TENSOR.ATTN_V,
1344
+ MODEL_TENSOR.ATTN_OUT,
1345
+ MODEL_TENSOR.FFN_NORM,
1346
+ MODEL_TENSOR.FFN_GATE,
1347
+ MODEL_TENSOR.FFN_DOWN,
1348
+ MODEL_TENSOR.FFN_UP,
1349
+ ],
1050
1350
  # TODO
1051
1351
  }
1052
1352
 
@@ -1087,6 +1387,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1087
1387
  MODEL_ARCH.CHATGLM: [
1088
1388
  MODEL_TENSOR.ROPE_FREQS,
1089
1389
  ],
1390
+ MODEL_ARCH.NEMOTRON: [
1391
+ MODEL_TENSOR.ROPE_FREQS,
1392
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1393
+ ],
1090
1394
  }
1091
1395
 
1092
1396
  #
@@ -1104,9 +1408,10 @@ class TokenType(IntEnum):
1104
1408
 
1105
1409
 
1106
1410
  class RopeScalingType(Enum):
1107
- NONE = 'none'
1108
- LINEAR = 'linear'
1109
- YARN = 'yarn'
1411
+ NONE = 'none'
1412
+ LINEAR = 'linear'
1413
+ YARN = 'yarn'
1414
+ LONGROPE = 'longrope'
1110
1415
 
1111
1416
 
1112
1417
  class PoolingType(IntEnum):
@@ -1145,6 +1450,8 @@ class GGMLQuantizationType(IntEnum):
1145
1450
  F64 = 28
1146
1451
  IQ1_M = 29
1147
1452
  BF16 = 30
1453
+ TQ1_0 = 34
1454
+ TQ2_0 = 35
1148
1455
 
1149
1456
 
1150
1457
  # TODO: add GGMLFileType from ggml_ftype in ggml.h
@@ -1157,7 +1464,7 @@ class LlamaFileType(IntEnum):
1157
1464
  MOSTLY_F16 = 1 # except 1d tensors
1158
1465
  MOSTLY_Q4_0 = 2 # except 1d tensors
1159
1466
  MOSTLY_Q4_1 = 3 # except 1d tensors
1160
- MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
1467
+ # MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
1161
1468
  # MOSTLY_Q4_2 = 5 # support has been removed
1162
1469
  # MOSTLY_Q4_3 = 6 # support has been removed
1163
1470
  MOSTLY_Q8_0 = 7 # except 1d tensors
@@ -1186,6 +1493,11 @@ class LlamaFileType(IntEnum):
1186
1493
  MOSTLY_IQ4_XS = 30 # except 1d tensors
1187
1494
  MOSTLY_IQ1_M = 31 # except 1d tensors
1188
1495
  MOSTLY_BF16 = 32 # except 1d tensors
1496
+ # MOSTLY_Q4_0_4_4 = 33 # removed from gguf files, use Q4_0 and runtime repack
1497
+ # MOSTLY_Q4_0_4_8 = 34 # removed from gguf files, use Q4_0 and runtime repack
1498
+ # MOSTLY_Q4_0_8_8 = 35 # removed from gguf files, use Q4_0 and runtime repack
1499
+ MOSTLY_TQ1_0 = 36 # except 1d tensors
1500
+ MOSTLY_TQ2_0 = 37 # except 1d tensors
1189
1501
 
1190
1502
  GUESSED = 1024 # not specified in the model file
1191
1503
 
@@ -1259,6 +1571,8 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
1259
1571
  GGMLQuantizationType.F64: (1, 8),
1260
1572
  GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
1261
1573
  GGMLQuantizationType.BF16: (1, 2),
1574
+ GGMLQuantizationType.TQ1_0: (256, 2 + 4 * 13),
1575
+ GGMLQuantizationType.TQ2_0: (256, 2 + 64),
1262
1576
  }
1263
1577
 
1264
1578
 
@@ -1306,6 +1620,7 @@ KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
1306
1620
  KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
1307
1621
  KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
1308
1622
  KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
1623
+ KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
1309
1624
 
1310
1625
  # tokenization
1311
1626
  KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
@@ -1316,6 +1631,8 @@ KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES
1316
1631
  KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES
1317
1632
  KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID
1318
1633
  KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID
1634
+ KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
1635
+ KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
1319
1636
  KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
1320
1637
  KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
1321
1638
  KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
@@ -1323,7 +1640,15 @@ KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
1323
1640
  KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
1324
1641
  KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
1325
1642
  KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
1326
- KEY_TOKENIZER_PRIFIX_ID = Keys.Tokenizer.PREFIX_ID
1643
+
1644
+ KEY_TOKENIZER_FIM_PRE_ID = Keys.Tokenizer.FIM_PRE_ID
1645
+ KEY_TOKENIZER_FIM_SUF_ID = Keys.Tokenizer.FIM_SUF_ID
1646
+ KEY_TOKENIZER_FIM_MID_ID = Keys.Tokenizer.FIM_MID_ID
1647
+ KEY_TOKENIZER_FIM_PAD_ID = Keys.Tokenizer.FIM_PAD_ID
1648
+ KEY_TOKENIZER_FIM_REP_ID = Keys.Tokenizer.FIM_REP_ID
1649
+ KEY_TOKENIZER_FIM_SEP_ID = Keys.Tokenizer.FIM_SEP_ID
1650
+
1651
+ # deprecated
1652
+ KEY_TOKENIZER_PREFIX_ID = Keys.Tokenizer.PREFIX_ID
1327
1653
  KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
1328
1654
  KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
1329
- KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID