bigdl-core-cpp 2.5.0b20240420__py3-none-win_amd64.whl → 2.5.0b20240422__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. bigdl/cpp/convert.py +276 -189
  2. bigdl/cpp/gguf-py/__init__.py +0 -0
  3. bigdl/cpp/gguf-py/gguf/__init__.py +5 -0
  4. bigdl/cpp/gguf-py/gguf/constants.py +943 -0
  5. bigdl/cpp/gguf-py/gguf/gguf.py +15 -0
  6. bigdl/cpp/gguf-py/gguf/gguf_reader.py +279 -0
  7. bigdl/cpp/gguf-py/gguf/gguf_writer.py +518 -0
  8. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +434 -0
  9. bigdl/cpp/gguf-py/gguf/vocab.py +181 -0
  10. bigdl/cpp/libs/baby-llama.exe +0 -0
  11. bigdl/cpp/libs/batched-bench.exe +0 -0
  12. bigdl/cpp/libs/batched.exe +0 -0
  13. bigdl/cpp/libs/beam-search.exe +0 -0
  14. bigdl/cpp/libs/benchmark.exe +0 -0
  15. bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
  16. bigdl/cpp/libs/embedding.exe +0 -0
  17. bigdl/cpp/libs/export-lora.exe +0 -0
  18. bigdl/cpp/libs/finetune.exe +0 -0
  19. bigdl/cpp/libs/gguf.exe +0 -0
  20. bigdl/cpp/libs/gritlm.exe +0 -0
  21. bigdl/cpp/libs/imatrix.exe +0 -0
  22. bigdl/cpp/libs/infill.exe +0 -0
  23. bigdl/cpp/libs/llama-bench.exe +0 -0
  24. bigdl/cpp/libs/llava-cli.exe +0 -0
  25. bigdl/cpp/libs/lookahead.exe +0 -0
  26. bigdl/cpp/libs/lookup.exe +0 -0
  27. bigdl/cpp/libs/ls-sycl-device.exe +0 -0
  28. bigdl/cpp/libs/main.exe +0 -0
  29. bigdl/cpp/libs/ollama.exe +0 -0
  30. bigdl/cpp/libs/parallel.exe +0 -0
  31. bigdl/cpp/libs/passkey.exe +0 -0
  32. bigdl/cpp/libs/perplexity.exe +0 -0
  33. bigdl/cpp/libs/q8dot.exe +0 -0
  34. bigdl/cpp/libs/quantize-stats.exe +0 -0
  35. bigdl/cpp/libs/quantize.exe +0 -0
  36. bigdl/cpp/libs/save-load-state.exe +0 -0
  37. bigdl/cpp/libs/server.exe +0 -0
  38. bigdl/cpp/libs/simple.exe +0 -0
  39. bigdl/cpp/libs/speculative.exe +0 -0
  40. bigdl/cpp/libs/tokenize.exe +0 -0
  41. bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
  42. bigdl/cpp/libs/vdot.exe +0 -0
  43. {bigdl_core_cpp-2.5.0b20240420.data → bigdl_core_cpp-2.5.0b20240422.data}/scripts/init-llama-cpp.bat +1 -0
  44. {bigdl_core_cpp-2.5.0b20240420.dist-info → bigdl_core_cpp-2.5.0b20240422.dist-info}/METADATA +3 -3
  45. bigdl_core_cpp-2.5.0b20240422.dist-info/RECORD +50 -0
  46. bigdl_core_cpp-2.5.0b20240420.dist-info/RECORD +0 -42
  47. {bigdl_core_cpp-2.5.0b20240420.data → bigdl_core_cpp-2.5.0b20240422.data}/scripts/init-llama-cpp.ps1 +0 -0
  48. {bigdl_core_cpp-2.5.0b20240420.data → bigdl_core_cpp-2.5.0b20240422.data}/scripts/init-ollama.bat +0 -0
  49. {bigdl_core_cpp-2.5.0b20240420.dist-info → bigdl_core_cpp-2.5.0b20240422.dist-info}/WHEEL +0 -0
  50. {bigdl_core_cpp-2.5.0b20240420.dist-info → bigdl_core_cpp-2.5.0b20240422.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,943 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from enum import Enum, IntEnum, auto
5
+ from typing import Any
6
+
7
+ #
8
+ # constants
9
+ #
10
+
11
+ GGUF_MAGIC = 0x46554747 # "GGUF"
12
+ GGUF_VERSION = 3
13
+ GGUF_DEFAULT_ALIGNMENT = 32
14
+
15
+ #
16
+ # metadata keys
17
+ #
18
+
19
+
20
+ class Keys:
21
+ class General:
22
+ ARCHITECTURE = "general.architecture"
23
+ QUANTIZATION_VERSION = "general.quantization_version"
24
+ ALIGNMENT = "general.alignment"
25
+ NAME = "general.name"
26
+ AUTHOR = "general.author"
27
+ VERSION = "general.version"
28
+ URL = "general.url"
29
+ DESCRIPTION = "general.description"
30
+ LICENSE = "general.license"
31
+ SOURCE_URL = "general.source.url"
32
+ SOURCE_HF_REPO = "general.source.huggingface.repository"
33
+ FILE_TYPE = "general.file_type"
34
+
35
+ class LLM:
36
+ VOCAB_SIZE = "{arch}.vocab_size"
37
+ CONTEXT_LENGTH = "{arch}.context_length"
38
+ EMBEDDING_LENGTH = "{arch}.embedding_length"
39
+ BLOCK_COUNT = "{arch}.block_count"
40
+ FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
41
+ USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
42
+ TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
43
+ EXPERT_COUNT = "{arch}.expert_count"
44
+ EXPERT_USED_COUNT = "{arch}.expert_used_count"
45
+ POOLING_TYPE = "{arch}.pooling_type"
46
+ LOGIT_SCALE = "{arch}.logit_scale"
47
+
48
+ class Attention:
49
+ HEAD_COUNT = "{arch}.attention.head_count"
50
+ HEAD_COUNT_KV = "{arch}.attention.head_count_kv"
51
+ MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias"
52
+ CLAMP_KQV = "{arch}.attention.clamp_kqv"
53
+ KEY_LENGTH = "{arch}.attention.key_length"
54
+ VALUE_LENGTH = "{arch}.attention.value_length"
55
+ LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
56
+ LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
57
+ CAUSAL = "{arch}.attention.causal"
58
+
59
+ class Rope:
60
+ DIMENSION_COUNT = "{arch}.rope.dimension_count"
61
+ FREQ_BASE = "{arch}.rope.freq_base"
62
+ SCALING_TYPE = "{arch}.rope.scaling.type"
63
+ SCALING_FACTOR = "{arch}.rope.scaling.factor"
64
+ SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length"
65
+ SCALING_FINETUNED = "{arch}.rope.scaling.finetuned"
66
+
67
+ class SSM:
68
+ CONV_KERNEL = "{arch}.ssm.conv_kernel"
69
+ INNER_SIZE = "{arch}.ssm.inner_size"
70
+ STATE_SIZE = "{arch}.ssm.state_size"
71
+ TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
72
+
73
+ class Tokenizer:
74
+ MODEL = "tokenizer.ggml.model"
75
+ LIST = "tokenizer.ggml.tokens"
76
+ TOKEN_TYPE = "tokenizer.ggml.token_type"
77
+ TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types
78
+ SCORES = "tokenizer.ggml.scores"
79
+ MERGES = "tokenizer.ggml.merges"
80
+ BOS_ID = "tokenizer.ggml.bos_token_id"
81
+ EOS_ID = "tokenizer.ggml.eos_token_id"
82
+ UNK_ID = "tokenizer.ggml.unknown_token_id"
83
+ SEP_ID = "tokenizer.ggml.seperator_token_id"
84
+ PAD_ID = "tokenizer.ggml.padding_token_id"
85
+ CLS_ID = "tokenizer.ggml.cls_token_id"
86
+ MASK_ID = "tokenizer.ggml.mask_token_id"
87
+ ADD_BOS = "tokenizer.ggml.add_bos_token"
88
+ ADD_EOS = "tokenizer.ggml.add_eos_token"
89
+ ADD_PREFIX = "tokenizer.ggml.add_space_prefix"
90
+ HF_JSON = "tokenizer.huggingface.json"
91
+ RWKV = "tokenizer.rwkv.world"
92
+ CHAT_TEMPLATE = "tokenizer.chat_template"
93
+ CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
94
+ CHAT_TEMPLATES = "tokenizer.chat_templates"
95
+ # FIM/Infill special tokens constants
96
+ PREFIX_ID = "tokenizer.ggml.prefix_token_id"
97
+ SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
98
+ MIDDLE_ID = "tokenizer.ggml.middle_token_id"
99
+ EOT_ID = "tokenizer.ggml.eot_token_id"
100
+
101
+
102
+ #
103
+ # recommended mapping of model tensor names for storage in gguf
104
+ #
105
+
106
+
107
+ class MODEL_ARCH(IntEnum):
108
+ LLAMA = auto()
109
+ FALCON = auto()
110
+ BAICHUAN = auto()
111
+ GROK = auto()
112
+ GPT2 = auto()
113
+ GPTJ = auto()
114
+ GPTNEOX = auto()
115
+ MPT = auto()
116
+ STARCODER = auto()
117
+ PERSIMMON = auto()
118
+ REFACT = auto()
119
+ BERT = auto()
120
+ NOMIC_BERT = auto()
121
+ BLOOM = auto()
122
+ STABLELM = auto()
123
+ QWEN = auto()
124
+ QWEN2 = auto()
125
+ QWEN2MOE = auto()
126
+ PHI2 = auto()
127
+ PLAMO = auto()
128
+ CODESHELL = auto()
129
+ ORION = auto()
130
+ INTERNLM2 = auto()
131
+ MINICPM = auto()
132
+ GEMMA = auto()
133
+ STARCODER2 = auto()
134
+ MAMBA = auto()
135
+ XVERSE = auto()
136
+ COMMAND_R = auto()
137
+ DBRX = auto()
138
+ OLMO = auto()
139
+
140
+
141
+ class MODEL_TENSOR(IntEnum):
142
+ TOKEN_EMBD = auto()
143
+ TOKEN_EMBD_NORM = auto()
144
+ TOKEN_TYPES = auto()
145
+ POS_EMBD = auto()
146
+ OUTPUT = auto()
147
+ OUTPUT_NORM = auto()
148
+ ROPE_FREQS = auto()
149
+ ATTN_Q = auto()
150
+ ATTN_K = auto()
151
+ ATTN_V = auto()
152
+ ATTN_QKV = auto()
153
+ ATTN_OUT = auto()
154
+ ATTN_NORM = auto()
155
+ ATTN_NORM_2 = auto()
156
+ ATTN_OUT_NORM = auto()
157
+ ATTN_ROT_EMBD = auto()
158
+ FFN_GATE_INP = auto()
159
+ FFN_GATE_INP_SHEXP = auto()
160
+ FFN_NORM = auto()
161
+ FFN_GATE = auto()
162
+ FFN_DOWN = auto()
163
+ FFN_UP = auto()
164
+ FFN_ACT = auto()
165
+ FFN_GATE_EXP = auto()
166
+ FFN_DOWN_EXP = auto()
167
+ FFN_UP_EXP = auto()
168
+ FFN_GATE_SHEXP = auto()
169
+ FFN_DOWN_SHEXP = auto()
170
+ FFN_UP_SHEXP = auto()
171
+ ATTN_Q_NORM = auto()
172
+ ATTN_K_NORM = auto()
173
+ LAYER_OUT_NORM = auto()
174
+ SSM_IN = auto()
175
+ SSM_CONV1D = auto()
176
+ SSM_X = auto()
177
+ SSM_DT = auto()
178
+ SSM_A = auto()
179
+ SSM_D = auto()
180
+ SSM_OUT = auto()
181
+
182
+
183
+ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
184
+ MODEL_ARCH.LLAMA: "llama",
185
+ MODEL_ARCH.FALCON: "falcon",
186
+ MODEL_ARCH.BAICHUAN: "baichuan",
187
+ MODEL_ARCH.GROK: "grok",
188
+ MODEL_ARCH.GPT2: "gpt2",
189
+ MODEL_ARCH.GPTJ: "gptj",
190
+ MODEL_ARCH.GPTNEOX: "gptneox",
191
+ MODEL_ARCH.MPT: "mpt",
192
+ MODEL_ARCH.STARCODER: "starcoder",
193
+ MODEL_ARCH.PERSIMMON: "persimmon",
194
+ MODEL_ARCH.REFACT: "refact",
195
+ MODEL_ARCH.BERT: "bert",
196
+ MODEL_ARCH.NOMIC_BERT: "nomic-bert",
197
+ MODEL_ARCH.BLOOM: "bloom",
198
+ MODEL_ARCH.STABLELM: "stablelm",
199
+ MODEL_ARCH.QWEN: "qwen",
200
+ MODEL_ARCH.QWEN2: "qwen2",
201
+ MODEL_ARCH.QWEN2MOE: "qwen2moe",
202
+ MODEL_ARCH.PHI2: "phi2",
203
+ MODEL_ARCH.PLAMO: "plamo",
204
+ MODEL_ARCH.CODESHELL: "codeshell",
205
+ MODEL_ARCH.ORION: "orion",
206
+ MODEL_ARCH.INTERNLM2: "internlm2",
207
+ MODEL_ARCH.MINICPM: "minicpm",
208
+ MODEL_ARCH.GEMMA: "gemma",
209
+ MODEL_ARCH.STARCODER2: "starcoder2",
210
+ MODEL_ARCH.MAMBA: "mamba",
211
+ MODEL_ARCH.XVERSE: "xverse",
212
+ MODEL_ARCH.COMMAND_R: "command-r",
213
+ MODEL_ARCH.DBRX: "dbrx",
214
+ MODEL_ARCH.OLMO: "olmo",
215
+ }
216
+
217
+ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
218
+ MODEL_TENSOR.TOKEN_EMBD: "token_embd",
219
+ MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
220
+ MODEL_TENSOR.TOKEN_TYPES: "token_types",
221
+ MODEL_TENSOR.POS_EMBD: "position_embd",
222
+ MODEL_TENSOR.OUTPUT_NORM: "output_norm",
223
+ MODEL_TENSOR.OUTPUT: "output",
224
+ MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
225
+ MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
226
+ MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
227
+ MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
228
+ MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
229
+ MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
230
+ MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
231
+ MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
232
+ MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
233
+ MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
234
+ MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
235
+ MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
236
+ MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
237
+ MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
238
+ MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
239
+ MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
240
+ MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
241
+ MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
242
+ MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
243
+ MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
244
+ MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
245
+ MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
246
+ MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
247
+ MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
248
+ MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
249
+ MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
250
+ MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
251
+ MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
252
+ MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
253
+ MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
254
+ MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
255
+ MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
256
+ MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
257
+ }
258
+
259
+ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
260
+ MODEL_ARCH.LLAMA: [
261
+ MODEL_TENSOR.TOKEN_EMBD,
262
+ MODEL_TENSOR.OUTPUT_NORM,
263
+ MODEL_TENSOR.OUTPUT,
264
+ MODEL_TENSOR.ROPE_FREQS,
265
+ MODEL_TENSOR.ATTN_NORM,
266
+ MODEL_TENSOR.ATTN_Q,
267
+ MODEL_TENSOR.ATTN_K,
268
+ MODEL_TENSOR.ATTN_V,
269
+ MODEL_TENSOR.ATTN_OUT,
270
+ MODEL_TENSOR.ATTN_ROT_EMBD,
271
+ MODEL_TENSOR.FFN_GATE_INP,
272
+ MODEL_TENSOR.FFN_NORM,
273
+ MODEL_TENSOR.FFN_GATE,
274
+ MODEL_TENSOR.FFN_DOWN,
275
+ MODEL_TENSOR.FFN_UP,
276
+ MODEL_TENSOR.FFN_GATE_EXP,
277
+ MODEL_TENSOR.FFN_DOWN_EXP,
278
+ MODEL_TENSOR.FFN_UP_EXP,
279
+ ],
280
+ MODEL_ARCH.GROK: [
281
+ MODEL_TENSOR.TOKEN_EMBD,
282
+ MODEL_TENSOR.OUTPUT_NORM,
283
+ MODEL_TENSOR.OUTPUT,
284
+ MODEL_TENSOR.ROPE_FREQS,
285
+ MODEL_TENSOR.ATTN_NORM,
286
+ MODEL_TENSOR.ATTN_Q,
287
+ MODEL_TENSOR.ATTN_K,
288
+ MODEL_TENSOR.ATTN_V,
289
+ MODEL_TENSOR.ATTN_OUT,
290
+ MODEL_TENSOR.ATTN_ROT_EMBD,
291
+ MODEL_TENSOR.ATTN_OUT_NORM,
292
+ MODEL_TENSOR.FFN_GATE_INP,
293
+ MODEL_TENSOR.FFN_NORM,
294
+ MODEL_TENSOR.FFN_GATE,
295
+ MODEL_TENSOR.FFN_DOWN,
296
+ MODEL_TENSOR.FFN_UP,
297
+ MODEL_TENSOR.FFN_GATE_EXP,
298
+ MODEL_TENSOR.FFN_DOWN_EXP,
299
+ MODEL_TENSOR.FFN_UP_EXP,
300
+ MODEL_TENSOR.LAYER_OUT_NORM,
301
+ ],
302
+ MODEL_ARCH.GPTNEOX: [
303
+ MODEL_TENSOR.TOKEN_EMBD,
304
+ MODEL_TENSOR.OUTPUT_NORM,
305
+ MODEL_TENSOR.OUTPUT,
306
+ MODEL_TENSOR.ATTN_NORM,
307
+ MODEL_TENSOR.ATTN_QKV,
308
+ MODEL_TENSOR.ATTN_OUT,
309
+ MODEL_TENSOR.FFN_NORM,
310
+ MODEL_TENSOR.FFN_DOWN,
311
+ MODEL_TENSOR.FFN_UP,
312
+ ],
313
+ MODEL_ARCH.FALCON: [
314
+ MODEL_TENSOR.TOKEN_EMBD,
315
+ MODEL_TENSOR.OUTPUT_NORM,
316
+ MODEL_TENSOR.OUTPUT,
317
+ MODEL_TENSOR.ATTN_NORM,
318
+ MODEL_TENSOR.ATTN_NORM_2,
319
+ MODEL_TENSOR.ATTN_QKV,
320
+ MODEL_TENSOR.ATTN_OUT,
321
+ MODEL_TENSOR.FFN_DOWN,
322
+ MODEL_TENSOR.FFN_UP,
323
+ ],
324
+ MODEL_ARCH.BAICHUAN: [
325
+ MODEL_TENSOR.TOKEN_EMBD,
326
+ MODEL_TENSOR.OUTPUT_NORM,
327
+ MODEL_TENSOR.OUTPUT,
328
+ MODEL_TENSOR.ROPE_FREQS,
329
+ MODEL_TENSOR.ATTN_NORM,
330
+ MODEL_TENSOR.ATTN_Q,
331
+ MODEL_TENSOR.ATTN_K,
332
+ MODEL_TENSOR.ATTN_V,
333
+ MODEL_TENSOR.ATTN_OUT,
334
+ MODEL_TENSOR.ATTN_ROT_EMBD,
335
+ MODEL_TENSOR.FFN_NORM,
336
+ MODEL_TENSOR.FFN_GATE,
337
+ MODEL_TENSOR.FFN_DOWN,
338
+ MODEL_TENSOR.FFN_UP,
339
+ ],
340
+ MODEL_ARCH.STARCODER: [
341
+ MODEL_TENSOR.TOKEN_EMBD,
342
+ MODEL_TENSOR.POS_EMBD,
343
+ MODEL_TENSOR.OUTPUT_NORM,
344
+ MODEL_TENSOR.OUTPUT,
345
+ MODEL_TENSOR.ATTN_NORM,
346
+ MODEL_TENSOR.ATTN_QKV,
347
+ MODEL_TENSOR.ATTN_OUT,
348
+ MODEL_TENSOR.FFN_NORM,
349
+ MODEL_TENSOR.FFN_DOWN,
350
+ MODEL_TENSOR.FFN_UP,
351
+ ],
352
+ MODEL_ARCH.BERT: [
353
+ MODEL_TENSOR.TOKEN_EMBD,
354
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
355
+ MODEL_TENSOR.TOKEN_TYPES,
356
+ MODEL_TENSOR.POS_EMBD,
357
+ MODEL_TENSOR.OUTPUT_NORM,
358
+ MODEL_TENSOR.ATTN_OUT_NORM,
359
+ MODEL_TENSOR.ATTN_Q,
360
+ MODEL_TENSOR.ATTN_K,
361
+ MODEL_TENSOR.ATTN_V,
362
+ MODEL_TENSOR.ATTN_OUT,
363
+ MODEL_TENSOR.FFN_DOWN,
364
+ MODEL_TENSOR.FFN_UP,
365
+ MODEL_TENSOR.LAYER_OUT_NORM,
366
+ ],
367
+ MODEL_ARCH.NOMIC_BERT: [
368
+ MODEL_TENSOR.TOKEN_EMBD,
369
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
370
+ MODEL_TENSOR.TOKEN_TYPES,
371
+ MODEL_TENSOR.POS_EMBD,
372
+ MODEL_TENSOR.OUTPUT_NORM,
373
+ MODEL_TENSOR.ATTN_OUT_NORM,
374
+ MODEL_TENSOR.ATTN_QKV,
375
+ MODEL_TENSOR.ATTN_OUT,
376
+ MODEL_TENSOR.FFN_GATE,
377
+ MODEL_TENSOR.FFN_DOWN,
378
+ MODEL_TENSOR.FFN_UP,
379
+ MODEL_TENSOR.LAYER_OUT_NORM,
380
+ ],
381
+ MODEL_ARCH.MPT: [
382
+ MODEL_TENSOR.TOKEN_EMBD,
383
+ MODEL_TENSOR.OUTPUT_NORM,
384
+ MODEL_TENSOR.OUTPUT,
385
+ MODEL_TENSOR.ATTN_NORM,
386
+ MODEL_TENSOR.ATTN_QKV,
387
+ MODEL_TENSOR.ATTN_OUT,
388
+ MODEL_TENSOR.FFN_NORM,
389
+ MODEL_TENSOR.FFN_DOWN,
390
+ MODEL_TENSOR.FFN_UP,
391
+ MODEL_TENSOR.FFN_ACT,
392
+ MODEL_TENSOR.ATTN_Q_NORM,
393
+ MODEL_TENSOR.ATTN_K_NORM,
394
+ MODEL_TENSOR.POS_EMBD,
395
+ ],
396
+ MODEL_ARCH.GPTJ: [
397
+ MODEL_TENSOR.TOKEN_EMBD,
398
+ MODEL_TENSOR.OUTPUT_NORM,
399
+ MODEL_TENSOR.OUTPUT,
400
+ MODEL_TENSOR.ATTN_NORM,
401
+ MODEL_TENSOR.ATTN_Q,
402
+ MODEL_TENSOR.ATTN_K,
403
+ MODEL_TENSOR.ATTN_V,
404
+ MODEL_TENSOR.ATTN_OUT,
405
+ MODEL_TENSOR.FFN_DOWN,
406
+ MODEL_TENSOR.FFN_UP,
407
+ ],
408
+ MODEL_ARCH.PERSIMMON: [
409
+ MODEL_TENSOR.TOKEN_EMBD,
410
+ MODEL_TENSOR.OUTPUT,
411
+ MODEL_TENSOR.OUTPUT_NORM,
412
+ MODEL_TENSOR.ATTN_NORM,
413
+ MODEL_TENSOR.ATTN_QKV,
414
+ MODEL_TENSOR.ATTN_OUT,
415
+ MODEL_TENSOR.FFN_NORM,
416
+ MODEL_TENSOR.FFN_DOWN,
417
+ MODEL_TENSOR.FFN_UP,
418
+ MODEL_TENSOR.ATTN_Q_NORM,
419
+ MODEL_TENSOR.ATTN_K_NORM,
420
+ MODEL_TENSOR.ATTN_ROT_EMBD,
421
+ ],
422
+ MODEL_ARCH.REFACT: [
423
+ MODEL_TENSOR.TOKEN_EMBD,
424
+ MODEL_TENSOR.OUTPUT_NORM,
425
+ MODEL_TENSOR.OUTPUT,
426
+ MODEL_TENSOR.ATTN_NORM,
427
+ MODEL_TENSOR.ATTN_Q,
428
+ MODEL_TENSOR.ATTN_K,
429
+ MODEL_TENSOR.ATTN_V,
430
+ MODEL_TENSOR.ATTN_OUT,
431
+ MODEL_TENSOR.FFN_NORM,
432
+ MODEL_TENSOR.FFN_GATE,
433
+ MODEL_TENSOR.FFN_DOWN,
434
+ MODEL_TENSOR.FFN_UP,
435
+ ],
436
+ MODEL_ARCH.BLOOM: [
437
+ MODEL_TENSOR.TOKEN_EMBD,
438
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
439
+ MODEL_TENSOR.OUTPUT_NORM,
440
+ MODEL_TENSOR.OUTPUT,
441
+ MODEL_TENSOR.ATTN_NORM,
442
+ MODEL_TENSOR.ATTN_QKV,
443
+ MODEL_TENSOR.ATTN_OUT,
444
+ MODEL_TENSOR.FFN_NORM,
445
+ MODEL_TENSOR.FFN_DOWN,
446
+ MODEL_TENSOR.FFN_UP,
447
+ ],
448
+ MODEL_ARCH.STABLELM: [
449
+ MODEL_TENSOR.TOKEN_EMBD,
450
+ MODEL_TENSOR.OUTPUT_NORM,
451
+ MODEL_TENSOR.OUTPUT,
452
+ MODEL_TENSOR.ROPE_FREQS,
453
+ MODEL_TENSOR.ATTN_NORM,
454
+ MODEL_TENSOR.ATTN_Q,
455
+ MODEL_TENSOR.ATTN_K,
456
+ MODEL_TENSOR.ATTN_V,
457
+ MODEL_TENSOR.ATTN_OUT,
458
+ MODEL_TENSOR.FFN_NORM,
459
+ MODEL_TENSOR.FFN_GATE,
460
+ MODEL_TENSOR.FFN_DOWN,
461
+ MODEL_TENSOR.FFN_UP,
462
+ MODEL_TENSOR.ATTN_Q_NORM,
463
+ MODEL_TENSOR.ATTN_K_NORM,
464
+ ],
465
+ MODEL_ARCH.QWEN: [
466
+ MODEL_TENSOR.TOKEN_EMBD,
467
+ MODEL_TENSOR.OUTPUT_NORM,
468
+ MODEL_TENSOR.OUTPUT,
469
+ MODEL_TENSOR.ROPE_FREQS,
470
+ MODEL_TENSOR.ATTN_NORM,
471
+ MODEL_TENSOR.ATTN_QKV,
472
+ MODEL_TENSOR.ATTN_OUT,
473
+ MODEL_TENSOR.ATTN_ROT_EMBD,
474
+ MODEL_TENSOR.FFN_NORM,
475
+ MODEL_TENSOR.FFN_GATE,
476
+ MODEL_TENSOR.FFN_DOWN,
477
+ MODEL_TENSOR.FFN_UP,
478
+ ],
479
+ MODEL_ARCH.QWEN2: [
480
+ MODEL_TENSOR.TOKEN_EMBD,
481
+ MODEL_TENSOR.OUTPUT_NORM,
482
+ MODEL_TENSOR.OUTPUT,
483
+ MODEL_TENSOR.ATTN_NORM,
484
+ MODEL_TENSOR.ATTN_Q,
485
+ MODEL_TENSOR.ATTN_K,
486
+ MODEL_TENSOR.ATTN_V,
487
+ MODEL_TENSOR.ATTN_OUT,
488
+ MODEL_TENSOR.FFN_NORM,
489
+ MODEL_TENSOR.FFN_GATE,
490
+ MODEL_TENSOR.FFN_DOWN,
491
+ MODEL_TENSOR.FFN_UP,
492
+ ],
493
+ MODEL_ARCH.QWEN2MOE: [
494
+ MODEL_TENSOR.TOKEN_EMBD,
495
+ MODEL_TENSOR.OUTPUT_NORM,
496
+ MODEL_TENSOR.OUTPUT,
497
+ MODEL_TENSOR.ATTN_NORM,
498
+ MODEL_TENSOR.ATTN_Q,
499
+ MODEL_TENSOR.ATTN_K,
500
+ MODEL_TENSOR.ATTN_V,
501
+ MODEL_TENSOR.ATTN_OUT,
502
+ MODEL_TENSOR.FFN_NORM,
503
+ MODEL_TENSOR.FFN_GATE_INP,
504
+ MODEL_TENSOR.FFN_GATE_EXP,
505
+ MODEL_TENSOR.FFN_DOWN_EXP,
506
+ MODEL_TENSOR.FFN_UP_EXP,
507
+ MODEL_TENSOR.FFN_GATE_INP_SHEXP,
508
+ MODEL_TENSOR.FFN_GATE_SHEXP,
509
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
510
+ MODEL_TENSOR.FFN_UP_SHEXP,
511
+ ],
512
+ MODEL_ARCH.PLAMO: [
513
+ MODEL_TENSOR.TOKEN_EMBD,
514
+ MODEL_TENSOR.OUTPUT_NORM,
515
+ MODEL_TENSOR.OUTPUT,
516
+ MODEL_TENSOR.ROPE_FREQS,
517
+ MODEL_TENSOR.ATTN_NORM,
518
+ MODEL_TENSOR.ATTN_Q,
519
+ MODEL_TENSOR.ATTN_K,
520
+ MODEL_TENSOR.ATTN_V,
521
+ MODEL_TENSOR.ATTN_OUT,
522
+ MODEL_TENSOR.ATTN_ROT_EMBD,
523
+ MODEL_TENSOR.FFN_GATE,
524
+ MODEL_TENSOR.FFN_DOWN,
525
+ MODEL_TENSOR.FFN_UP,
526
+ ],
527
+ MODEL_ARCH.GPT2: [
528
+ MODEL_TENSOR.TOKEN_EMBD,
529
+ MODEL_TENSOR.POS_EMBD,
530
+ MODEL_TENSOR.OUTPUT_NORM,
531
+ MODEL_TENSOR.OUTPUT,
532
+ MODEL_TENSOR.ATTN_NORM,
533
+ MODEL_TENSOR.ATTN_QKV,
534
+ MODEL_TENSOR.ATTN_OUT,
535
+ MODEL_TENSOR.FFN_NORM,
536
+ MODEL_TENSOR.FFN_DOWN,
537
+ MODEL_TENSOR.FFN_UP,
538
+ ],
539
+ MODEL_ARCH.PHI2: [
540
+ MODEL_TENSOR.TOKEN_EMBD,
541
+ MODEL_TENSOR.OUTPUT_NORM,
542
+ MODEL_TENSOR.OUTPUT,
543
+ MODEL_TENSOR.ATTN_NORM,
544
+ MODEL_TENSOR.ATTN_QKV,
545
+ MODEL_TENSOR.ATTN_Q,
546
+ MODEL_TENSOR.ATTN_K,
547
+ MODEL_TENSOR.ATTN_V,
548
+ MODEL_TENSOR.ATTN_OUT,
549
+ MODEL_TENSOR.FFN_NORM,
550
+ MODEL_TENSOR.FFN_DOWN,
551
+ MODEL_TENSOR.FFN_UP,
552
+ ],
553
+ MODEL_ARCH.CODESHELL: [
554
+ MODEL_TENSOR.TOKEN_EMBD,
555
+ MODEL_TENSOR.POS_EMBD,
556
+ MODEL_TENSOR.OUTPUT_NORM,
557
+ MODEL_TENSOR.OUTPUT,
558
+ MODEL_TENSOR.ATTN_NORM,
559
+ MODEL_TENSOR.ATTN_QKV,
560
+ MODEL_TENSOR.ATTN_OUT,
561
+ MODEL_TENSOR.ATTN_ROT_EMBD,
562
+ MODEL_TENSOR.FFN_NORM,
563
+ MODEL_TENSOR.FFN_DOWN,
564
+ MODEL_TENSOR.FFN_UP,
565
+ ],
566
+ MODEL_ARCH.ORION: [
567
+ MODEL_TENSOR.TOKEN_EMBD,
568
+ MODEL_TENSOR.OUTPUT_NORM,
569
+ MODEL_TENSOR.OUTPUT,
570
+ MODEL_TENSOR.ROPE_FREQS,
571
+ MODEL_TENSOR.ATTN_NORM,
572
+ MODEL_TENSOR.ATTN_Q,
573
+ MODEL_TENSOR.ATTN_K,
574
+ MODEL_TENSOR.ATTN_V,
575
+ MODEL_TENSOR.ATTN_OUT,
576
+ MODEL_TENSOR.ATTN_ROT_EMBD,
577
+ MODEL_TENSOR.FFN_NORM,
578
+ MODEL_TENSOR.FFN_GATE,
579
+ MODEL_TENSOR.FFN_DOWN,
580
+ MODEL_TENSOR.FFN_UP,
581
+ ],
582
+ MODEL_ARCH.INTERNLM2: [
583
+ MODEL_TENSOR.TOKEN_EMBD,
584
+ MODEL_TENSOR.OUTPUT_NORM,
585
+ MODEL_TENSOR.OUTPUT,
586
+ MODEL_TENSOR.ATTN_NORM,
587
+ MODEL_TENSOR.ATTN_Q,
588
+ MODEL_TENSOR.ATTN_K,
589
+ MODEL_TENSOR.ATTN_V,
590
+ MODEL_TENSOR.ATTN_OUT,
591
+ MODEL_TENSOR.ATTN_ROT_EMBD,
592
+ MODEL_TENSOR.FFN_NORM,
593
+ MODEL_TENSOR.FFN_GATE,
594
+ MODEL_TENSOR.FFN_DOWN,
595
+ MODEL_TENSOR.FFN_UP,
596
+ ],
597
+ MODEL_ARCH.MINICPM: [
598
+ MODEL_TENSOR.TOKEN_EMBD,
599
+ MODEL_TENSOR.OUTPUT_NORM,
600
+ MODEL_TENSOR.ROPE_FREQS,
601
+ MODEL_TENSOR.ATTN_NORM,
602
+ MODEL_TENSOR.ATTN_Q,
603
+ MODEL_TENSOR.ATTN_K,
604
+ MODEL_TENSOR.ATTN_V,
605
+ MODEL_TENSOR.ATTN_OUT,
606
+ MODEL_TENSOR.ATTN_ROT_EMBD,
607
+ MODEL_TENSOR.FFN_GATE_INP,
608
+ MODEL_TENSOR.FFN_NORM,
609
+ MODEL_TENSOR.FFN_GATE,
610
+ MODEL_TENSOR.FFN_DOWN,
611
+ MODEL_TENSOR.FFN_UP,
612
+ MODEL_TENSOR.FFN_GATE_EXP,
613
+ MODEL_TENSOR.FFN_DOWN_EXP,
614
+ MODEL_TENSOR.FFN_UP_EXP,
615
+ ],
616
+ MODEL_ARCH.GEMMA: [
617
+ MODEL_TENSOR.TOKEN_EMBD,
618
+ MODEL_TENSOR.OUTPUT_NORM,
619
+ MODEL_TENSOR.ATTN_NORM,
620
+ MODEL_TENSOR.ATTN_Q,
621
+ MODEL_TENSOR.ATTN_K,
622
+ MODEL_TENSOR.ATTN_V,
623
+ MODEL_TENSOR.ATTN_OUT,
624
+ MODEL_TENSOR.FFN_GATE,
625
+ MODEL_TENSOR.FFN_DOWN,
626
+ MODEL_TENSOR.FFN_UP,
627
+ MODEL_TENSOR.FFN_NORM,
628
+ ],
629
+ MODEL_ARCH.STARCODER2: [
630
+ MODEL_TENSOR.TOKEN_EMBD,
631
+ MODEL_TENSOR.OUTPUT_NORM,
632
+ MODEL_TENSOR.OUTPUT,
633
+ MODEL_TENSOR.ROPE_FREQS,
634
+ MODEL_TENSOR.ATTN_NORM,
635
+ MODEL_TENSOR.ATTN_Q,
636
+ MODEL_TENSOR.ATTN_K,
637
+ MODEL_TENSOR.ATTN_V,
638
+ MODEL_TENSOR.ATTN_OUT,
639
+ MODEL_TENSOR.ATTN_ROT_EMBD,
640
+ MODEL_TENSOR.FFN_NORM,
641
+ MODEL_TENSOR.FFN_DOWN,
642
+ MODEL_TENSOR.FFN_UP,
643
+ ],
644
+ MODEL_ARCH.MAMBA: [
645
+ MODEL_TENSOR.TOKEN_EMBD,
646
+ MODEL_TENSOR.OUTPUT_NORM,
647
+ MODEL_TENSOR.OUTPUT,
648
+ MODEL_TENSOR.ATTN_NORM,
649
+ MODEL_TENSOR.SSM_IN,
650
+ MODEL_TENSOR.SSM_CONV1D,
651
+ MODEL_TENSOR.SSM_X,
652
+ MODEL_TENSOR.SSM_DT,
653
+ MODEL_TENSOR.SSM_A,
654
+ MODEL_TENSOR.SSM_D,
655
+ MODEL_TENSOR.SSM_OUT,
656
+ ],
657
+ MODEL_ARCH.XVERSE: [
658
+ MODEL_TENSOR.TOKEN_EMBD,
659
+ MODEL_TENSOR.OUTPUT_NORM,
660
+ MODEL_TENSOR.OUTPUT,
661
+ MODEL_TENSOR.ROPE_FREQS,
662
+ MODEL_TENSOR.ATTN_NORM,
663
+ MODEL_TENSOR.ATTN_Q,
664
+ MODEL_TENSOR.ATTN_K,
665
+ MODEL_TENSOR.ATTN_V,
666
+ MODEL_TENSOR.ATTN_OUT,
667
+ MODEL_TENSOR.ATTN_ROT_EMBD,
668
+ MODEL_TENSOR.FFN_NORM,
669
+ MODEL_TENSOR.FFN_GATE,
670
+ MODEL_TENSOR.FFN_DOWN,
671
+ MODEL_TENSOR.FFN_UP,
672
+ ],
673
+ MODEL_ARCH.COMMAND_R: [
674
+ MODEL_TENSOR.TOKEN_EMBD,
675
+ MODEL_TENSOR.OUTPUT_NORM,
676
+ MODEL_TENSOR.ATTN_NORM,
677
+ MODEL_TENSOR.ATTN_Q,
678
+ MODEL_TENSOR.ATTN_K,
679
+ MODEL_TENSOR.ATTN_V,
680
+ MODEL_TENSOR.ATTN_OUT,
681
+ MODEL_TENSOR.FFN_GATE,
682
+ MODEL_TENSOR.FFN_DOWN,
683
+ MODEL_TENSOR.FFN_UP,
684
+ MODEL_TENSOR.ATTN_K_NORM,
685
+ MODEL_TENSOR.ATTN_Q_NORM,
686
+ ],
687
+ MODEL_ARCH.DBRX: [
688
+ MODEL_TENSOR.TOKEN_EMBD,
689
+ MODEL_TENSOR.OUTPUT_NORM,
690
+ MODEL_TENSOR.OUTPUT,
691
+ MODEL_TENSOR.ATTN_NORM,
692
+ MODEL_TENSOR.ATTN_QKV,
693
+ MODEL_TENSOR.ATTN_OUT,
694
+ MODEL_TENSOR.ATTN_OUT_NORM,
695
+ MODEL_TENSOR.FFN_GATE_INP,
696
+ MODEL_TENSOR.FFN_GATE_EXP,
697
+ MODEL_TENSOR.FFN_DOWN_EXP,
698
+ MODEL_TENSOR.FFN_UP_EXP,
699
+ ],
700
+ MODEL_ARCH.OLMO: [
701
+ MODEL_TENSOR.TOKEN_EMBD,
702
+ MODEL_TENSOR.OUTPUT,
703
+ MODEL_TENSOR.ATTN_Q,
704
+ MODEL_TENSOR.ATTN_K,
705
+ MODEL_TENSOR.ATTN_V,
706
+ MODEL_TENSOR.ATTN_OUT,
707
+ MODEL_TENSOR.FFN_GATE,
708
+ MODEL_TENSOR.FFN_DOWN,
709
+ MODEL_TENSOR.FFN_UP,
710
+ ],
711
+ # TODO
712
+ }
713
+
714
+ # tensors that will not be serialized
715
+ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
716
+ MODEL_ARCH.LLAMA: [
717
+ MODEL_TENSOR.ROPE_FREQS,
718
+ MODEL_TENSOR.ATTN_ROT_EMBD,
719
+ ],
720
+ MODEL_ARCH.BAICHUAN: [
721
+ MODEL_TENSOR.ROPE_FREQS,
722
+ MODEL_TENSOR.ATTN_ROT_EMBD,
723
+ ],
724
+ MODEL_ARCH.PERSIMMON: [
725
+ MODEL_TENSOR.ROPE_FREQS,
726
+ ],
727
+ MODEL_ARCH.QWEN: [
728
+ MODEL_TENSOR.ROPE_FREQS,
729
+ MODEL_TENSOR.ATTN_ROT_EMBD,
730
+ ],
731
+ MODEL_ARCH.CODESHELL: [
732
+ MODEL_TENSOR.ROPE_FREQS,
733
+ MODEL_TENSOR.ATTN_ROT_EMBD,
734
+ ],
735
+ MODEL_ARCH.ORION: [
736
+ MODEL_TENSOR.ROPE_FREQS,
737
+ MODEL_TENSOR.ATTN_ROT_EMBD,
738
+ ],
739
+ MODEL_ARCH.STARCODER2: [
740
+ MODEL_TENSOR.ROPE_FREQS,
741
+ MODEL_TENSOR.ATTN_ROT_EMBD,
742
+ ],
743
+ MODEL_ARCH.XVERSE: [
744
+ MODEL_TENSOR.ROPE_FREQS,
745
+ MODEL_TENSOR.ATTN_ROT_EMBD,
746
+ ],
747
+ }
748
+
749
+ #
750
+ # types
751
+ #
752
+
753
+
754
+ class TokenType(IntEnum):
755
+ NORMAL = 1
756
+ UNKNOWN = 2
757
+ CONTROL = 3
758
+ USER_DEFINED = 4
759
+ UNUSED = 5
760
+ BYTE = 6
761
+
762
+
763
+ class RopeScalingType(Enum):
764
+ NONE = 'none'
765
+ LINEAR = 'linear'
766
+ YARN = 'yarn'
767
+
768
+
769
+ class PoolingType(IntEnum):
770
+ NONE = 0
771
+ MEAN = 1
772
+ CLS = 2
773
+
774
+
775
+ class GGMLQuantizationType(IntEnum):
776
+ F32 = 0
777
+ F16 = 1
778
+ Q4_0 = 2
779
+ Q4_1 = 3
780
+ Q5_0 = 6
781
+ Q5_1 = 7
782
+ Q8_0 = 8
783
+ Q8_1 = 9
784
+ Q2_K = 10
785
+ Q3_K = 11
786
+ Q4_K = 12
787
+ Q5_K = 13
788
+ Q6_K = 14
789
+ Q8_K = 15
790
+ IQ2_XXS = 16
791
+ IQ2_XS = 17
792
+ IQ3_XXS = 18
793
+ IQ1_S = 19
794
+ IQ4_NL = 20
795
+ IQ3_S = 21
796
+ IQ2_S = 22
797
+ IQ4_XS = 23
798
+ I8 = 24
799
+ I16 = 25
800
+ I32 = 26
801
+ I64 = 27
802
+ F64 = 28
803
+ IQ1_M = 29
804
+
805
+
806
+ class GGUFEndian(IntEnum):
807
+ LITTLE = 0
808
+ BIG = 1
809
+
810
+
811
+ class GGUFValueType(IntEnum):
812
+ UINT8 = 0
813
+ INT8 = 1
814
+ UINT16 = 2
815
+ INT16 = 3
816
+ UINT32 = 4
817
+ INT32 = 5
818
+ FLOAT32 = 6
819
+ BOOL = 7
820
+ STRING = 8
821
+ ARRAY = 9
822
+ UINT64 = 10
823
+ INT64 = 11
824
+ FLOAT64 = 12
825
+
826
+ @staticmethod
827
+ def get_type(val: Any) -> GGUFValueType:
828
+ if isinstance(val, (str, bytes, bytearray)):
829
+ return GGUFValueType.STRING
830
+ elif isinstance(val, list):
831
+ return GGUFValueType.ARRAY
832
+ elif isinstance(val, float):
833
+ return GGUFValueType.FLOAT32
834
+ elif isinstance(val, bool):
835
+ return GGUFValueType.BOOL
836
+ elif isinstance(val, int):
837
+ return GGUFValueType.INT32
838
+ # TODO: need help with 64-bit types in Python
839
+ else:
840
+ print("Unknown type:", type(val))
841
+ sys.exit()
842
+
843
+
844
+ # Note: Does not support GGML_QKK_64
845
+ QK_K = 256
846
+ # Items here are (block size, type size)
847
+ GGML_QUANT_SIZES = {
848
+ GGMLQuantizationType.F32: (1, 4),
849
+ GGMLQuantizationType.F16: (1, 2),
850
+ GGMLQuantizationType.Q4_0: (32, 2 + 16),
851
+ GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
852
+ GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
853
+ GGMLQuantizationType.Q5_1: (32, 2 + 2 + 4 + 16),
854
+ GGMLQuantizationType.Q8_0: (32, 2 + 32),
855
+ GGMLQuantizationType.Q8_1: (32, 4 + 4 + 32),
856
+ GGMLQuantizationType.Q2_K: (256, 2 + 2 + QK_K // 16 + QK_K // 4),
857
+ GGMLQuantizationType.Q3_K: (256, 2 + QK_K // 4 + QK_K // 8 + 12),
858
+ GGMLQuantizationType.Q4_K: (256, 2 + 2 + QK_K // 2 + 12),
859
+ GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
860
+ GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
861
+ GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8),
862
+ GGMLQuantizationType.IQ2_XXS: (256, 2 + QK_K // 4),
863
+ GGMLQuantizationType.IQ2_XS: (256, 2 + QK_K // 4 + QK_K // 32),
864
+ GGMLQuantizationType.IQ3_XXS: (256, 2 + QK_K // 4 + QK_K // 8),
865
+ GGMLQuantizationType.IQ1_S: (256, 2 + QK_K // 8 + QK_K // 16),
866
+ GGMLQuantizationType.IQ4_NL: (32, 2 + 16),
867
+ GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
868
+ GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16),
869
+ GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64),
870
+ GGMLQuantizationType.I8: (1, 1),
871
+ GGMLQuantizationType.I16: (1, 2),
872
+ GGMLQuantizationType.I32: (1, 4),
873
+ GGMLQuantizationType.I64: (1, 8),
874
+ GGMLQuantizationType.F64: (1, 8),
875
+ GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
876
+ }
877
+
878
+
879
+ # Aliases for backward compatibility.
880
+
881
+ # general
882
+ KEY_GENERAL_ARCHITECTURE = Keys.General.ARCHITECTURE
883
+ KEY_GENERAL_QUANTIZATION_VERSION = Keys.General.QUANTIZATION_VERSION
884
+ KEY_GENERAL_ALIGNMENT = Keys.General.ALIGNMENT
885
+ KEY_GENERAL_NAME = Keys.General.NAME
886
+ KEY_GENERAL_AUTHOR = Keys.General.AUTHOR
887
+ KEY_GENERAL_URL = Keys.General.URL
888
+ KEY_GENERAL_DESCRIPTION = Keys.General.DESCRIPTION
889
+ KEY_GENERAL_LICENSE = Keys.General.LICENSE
890
+ KEY_GENERAL_SOURCE_URL = Keys.General.SOURCE_URL
891
+ KEY_GENERAL_SOURCE_HF_REPO = Keys.General.SOURCE_HF_REPO
892
+ KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE
893
+
894
+ # LLM
895
+ KEY_VOCAB_SIZE = Keys.LLM.VOCAB_SIZE
896
+ KEY_CONTEXT_LENGTH = Keys.LLM.CONTEXT_LENGTH
897
+ KEY_EMBEDDING_LENGTH = Keys.LLM.EMBEDDING_LENGTH
898
+ KEY_BLOCK_COUNT = Keys.LLM.BLOCK_COUNT
899
+ KEY_FEED_FORWARD_LENGTH = Keys.LLM.FEED_FORWARD_LENGTH
900
+ KEY_USE_PARALLEL_RESIDUAL = Keys.LLM.USE_PARALLEL_RESIDUAL
901
+ KEY_TENSOR_DATA_LAYOUT = Keys.LLM.TENSOR_DATA_LAYOUT
902
+
903
+ # attention
904
+ KEY_ATTENTION_HEAD_COUNT = Keys.Attention.HEAD_COUNT
905
+ KEY_ATTENTION_HEAD_COUNT_KV = Keys.Attention.HEAD_COUNT_KV
906
+ KEY_ATTENTION_MAX_ALIBI_BIAS = Keys.Attention.MAX_ALIBI_BIAS
907
+ KEY_ATTENTION_CLAMP_KQV = Keys.Attention.CLAMP_KQV
908
+ KEY_ATTENTION_LAYERNORM_EPS = Keys.Attention.LAYERNORM_EPS
909
+ KEY_ATTENTION_LAYERNORM_RMS_EPS = Keys.Attention.LAYERNORM_RMS_EPS
910
+
911
+ # RoPE
912
+ KEY_ROPE_DIMENSION_COUNT = Keys.Rope.DIMENSION_COUNT
913
+ KEY_ROPE_FREQ_BASE = Keys.Rope.FREQ_BASE
914
+ KEY_ROPE_SCALING_TYPE = Keys.Rope.SCALING_TYPE
915
+ KEY_ROPE_SCALING_FACTOR = Keys.Rope.SCALING_FACTOR
916
+ KEY_ROPE_SCALING_ORIG_CTX_LEN = Keys.Rope.SCALING_ORIG_CTX_LEN
917
+ KEY_ROPE_SCALING_FINETUNED = Keys.Rope.SCALING_FINETUNED
918
+
919
+ # SSM
920
+ KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
921
+ KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
922
+ KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
923
+ KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
924
+
925
+ # tokenization
926
+ KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
927
+ KEY_TOKENIZER_LIST = Keys.Tokenizer.LIST
928
+ KEY_TOKENIZER_TOKEN_TYPE = Keys.Tokenizer.TOKEN_TYPE
929
+ KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES
930
+ KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES
931
+ KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID
932
+ KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID
933
+ KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
934
+ KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
935
+ KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
936
+ KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
937
+ KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
938
+ KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
939
+ KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
940
+ KEY_TOKENIZER_PRIFIX_ID = Keys.Tokenizer.PREFIX_ID
941
+ KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
942
+ KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
943
+ KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID