bigdl-core-cpp 2.5.0b20240725__py3-none-win_amd64.whl → 2.5.0b20240727__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert-hf-to-gguf.py +1106 -320
- bigdl/cpp/gguf-py/gguf/__init__.py +2 -0
- bigdl/cpp/gguf-py/gguf/constants.py +442 -173
- bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +29 -8
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +472 -156
- bigdl/cpp/gguf-py/gguf/lazy.py +24 -49
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +195 -23
- bigdl/cpp/libs/baby-llama.exe +0 -0
- bigdl/cpp/libs/batched-bench.exe +0 -0
- bigdl/cpp/libs/batched.exe +0 -0
- bigdl/cpp/libs/beam-search.exe +0 -0
- bigdl/cpp/libs/benchmark.exe +0 -0
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/embedding.exe +0 -0
- bigdl/cpp/libs/export-lora.exe +0 -0
- bigdl/cpp/libs/finetune.exe +0 -0
- bigdl/cpp/libs/ggml_shared.dll +0 -0
- bigdl/cpp/libs/gguf.exe +0 -0
- bigdl/cpp/libs/gritlm.exe +0 -0
- bigdl/cpp/libs/imatrix.exe +0 -0
- bigdl/cpp/libs/infill.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava-cli.exe +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/lookahead.exe +0 -0
- bigdl/cpp/libs/lookup.exe +0 -0
- bigdl/cpp/libs/ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/main.exe +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/parallel.exe +0 -0
- bigdl/cpp/libs/passkey.exe +0 -0
- bigdl/cpp/libs/perplexity.exe +0 -0
- bigdl/cpp/libs/q8dot.exe +0 -0
- bigdl/cpp/libs/quantize-stats.exe +0 -0
- bigdl/cpp/libs/quantize.exe +0 -0
- bigdl/cpp/libs/save-load-state.exe +0 -0
- bigdl/cpp/libs/server.exe +0 -0
- bigdl/cpp/libs/simple.exe +0 -0
- bigdl/cpp/libs/speculative.exe +0 -0
- bigdl/cpp/libs/tokenize.exe +0 -0
- bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
- bigdl/cpp/libs/vdot.exe +0 -0
- {bigdl_core_cpp-2.5.0b20240725.dist-info → bigdl_core_cpp-2.5.0b20240727.dist-info}/METADATA +1 -1
- bigdl_core_cpp-2.5.0b20240727.dist-info/RECORD +61 -0
- bigdl_core_cpp-2.5.0b20240725.dist-info/RECORD +0 -61
- {bigdl_core_cpp-2.5.0b20240725.data → bigdl_core_cpp-2.5.0b20240727.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.5.0b20240725.data → bigdl_core_cpp-2.5.0b20240727.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.5.0b20240725.data → bigdl_core_cpp-2.5.0b20240727.data}/scripts/init-ollama.bat +0 -0
- {bigdl_core_cpp-2.5.0b20240725.dist-info → bigdl_core_cpp-2.5.0b20240727.dist-info}/WHEEL +0 -0
- {bigdl_core_cpp-2.5.0b20240725.dist-info → bigdl_core_cpp-2.5.0b20240727.dist-info}/top_level.txt +0 -0
bigdl/cpp/gguf-py/gguf/lazy.py
CHANGED
@@ -3,10 +3,8 @@ from abc import ABC, ABCMeta, abstractmethod
|
|
3
3
|
|
4
4
|
import logging
|
5
5
|
from typing import Any, Callable
|
6
|
-
from collections import deque
|
7
6
|
|
8
7
|
import numpy as np
|
9
|
-
from numpy._typing import _Shape
|
10
8
|
from numpy.typing import DTypeLike
|
11
9
|
|
12
10
|
|
@@ -16,16 +14,16 @@ logger = logging.getLogger(__name__)
|
|
16
14
|
class LazyMeta(ABCMeta):
|
17
15
|
|
18
16
|
def __new__(cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs):
|
19
|
-
def __getattr__(self,
|
20
|
-
meta_attr = getattr(self._meta,
|
17
|
+
def __getattr__(self, name: str) -> Any:
|
18
|
+
meta_attr = getattr(self._meta, name)
|
21
19
|
if callable(meta_attr):
|
22
20
|
return type(self)._wrap_fn(
|
23
|
-
(lambda s, *args, **kwargs: getattr(s,
|
21
|
+
(lambda s, *args, **kwargs: getattr(s, name)(*args, **kwargs)),
|
24
22
|
use_self=self,
|
25
23
|
)
|
26
24
|
elif isinstance(meta_attr, self._tensor_type):
|
27
25
|
# e.g. self.T with torch.Tensor should still be wrapped
|
28
|
-
return type(self)._wrap_fn(lambda s: getattr(s,
|
26
|
+
return type(self)._wrap_fn(lambda s: getattr(s, name))(self)
|
29
27
|
else:
|
30
28
|
# no need to wrap non-tensor properties,
|
31
29
|
# and they likely don't depend on the actual contents of the tensor
|
@@ -75,20 +73,18 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
|
75
73
|
_tensor_type: type
|
76
74
|
_meta: Any
|
77
75
|
_data: Any | None
|
78
|
-
_lazy: deque[LazyBase] # shared within a graph, to avoid deep recursion when making eager
|
79
76
|
_args: tuple
|
80
|
-
|
77
|
+
_kwargs: dict[str, Any]
|
78
|
+
_func: Callable[[Any], Any] | None
|
81
79
|
|
82
|
-
def __init__(self, *, meta: Any, data: Any | None = None,
|
80
|
+
def __init__(self, *, meta: Any, data: Any | None = None, args: tuple = (), kwargs: dict[str, Any] | None = None, func: Callable[[Any], Any] | None = None):
|
83
81
|
super().__init__()
|
84
82
|
self._meta = meta
|
85
83
|
self._data = data
|
86
|
-
self._lazy = lazy if lazy is not None else deque()
|
87
84
|
self._args = args
|
85
|
+
self._kwargs = kwargs if kwargs is not None else {}
|
88
86
|
self._func = func
|
89
87
|
assert self._func is not None or self._data is not None
|
90
|
-
if self._data is None:
|
91
|
-
self._lazy.append(self)
|
92
88
|
|
93
89
|
def __init_subclass__(cls) -> None:
|
94
90
|
if "_tensor_type" not in cls.__dict__:
|
@@ -118,6 +114,7 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
|
118
114
|
args = ((use_self,) if use_self is not None else ()) + args
|
119
115
|
|
120
116
|
meta_args = LazyBase._recurse_apply(args, lambda t: t._meta)
|
117
|
+
# TODO: maybe handle tensors in kwargs too
|
121
118
|
|
122
119
|
if isinstance(meta_noop, bool) and not meta_noop:
|
123
120
|
try:
|
@@ -141,21 +138,7 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
|
141
138
|
res = cls.meta_with_dtype_and_shape(meta_noop, res.shape)
|
142
139
|
|
143
140
|
if isinstance(res, cls._tensor_type):
|
144
|
-
|
145
|
-
if collect_replace.shared_lazy is None:
|
146
|
-
collect_replace.shared_lazy = t._lazy
|
147
|
-
else:
|
148
|
-
collect_replace.shared_lazy.extend(t._lazy)
|
149
|
-
t._lazy = collect_replace.shared_lazy
|
150
|
-
|
151
|
-
# emulating a static variable
|
152
|
-
collect_replace.shared_lazy = None
|
153
|
-
|
154
|
-
LazyBase._recurse_apply(args, collect_replace)
|
155
|
-
|
156
|
-
shared_lazy = collect_replace.shared_lazy
|
157
|
-
|
158
|
-
return cls(meta=cls.eager_to_meta(res), lazy=shared_lazy, args=args, func=lambda a: fn(*a, **kwargs))
|
141
|
+
return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
|
159
142
|
else:
|
160
143
|
del res # not needed
|
161
144
|
# non-tensor return likely relies on the contents of the args
|
@@ -167,25 +150,18 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
|
167
150
|
@classmethod
|
168
151
|
def to_eager(cls, t: Any) -> Any:
|
169
152
|
def simple_to_eager(_t: LazyBase) -> Any:
|
170
|
-
|
171
|
-
assert _t._data is not None
|
153
|
+
if _t._data is not None:
|
172
154
|
return _t._data
|
173
155
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
assert lt._func is not None
|
184
|
-
lt._args = cls._recurse_apply(lt._args, already_eager_to_eager)
|
185
|
-
lt._data = lt._func(lt._args)
|
186
|
-
# sanity check
|
187
|
-
assert lt._data.dtype == lt._meta.dtype
|
188
|
-
assert lt._data.shape == lt._meta.shape
|
156
|
+
# NOTE: there's a recursion limit in Python (usually 1000)
|
157
|
+
|
158
|
+
assert _t._func is not None
|
159
|
+
_t._args = cls._recurse_apply(_t._args, simple_to_eager)
|
160
|
+
_t._data = _t._func(*_t._args, **_t._kwargs)
|
161
|
+
# sanity check
|
162
|
+
assert _t._data is not None
|
163
|
+
assert _t._data.dtype == _t._meta.dtype
|
164
|
+
assert _t._data.shape == _t._meta.shape
|
189
165
|
|
190
166
|
return _t._data
|
191
167
|
|
@@ -204,7 +180,7 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
|
204
180
|
@classmethod
|
205
181
|
def from_eager(cls, t: Any) -> Any:
|
206
182
|
if type(t) is cls:
|
207
|
-
# already
|
183
|
+
# already lazy
|
208
184
|
return t
|
209
185
|
elif isinstance(t, cls._tensor_type):
|
210
186
|
return cls(meta=cls.eager_to_meta(t), data=t)
|
@@ -216,7 +192,7 @@ class LazyNumpyTensor(LazyBase):
|
|
216
192
|
_tensor_type = np.ndarray
|
217
193
|
|
218
194
|
@classmethod
|
219
|
-
def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape:
|
195
|
+
def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) -> np.ndarray[Any, Any]:
|
220
196
|
# The initial idea was to use np.nan as the fill value,
|
221
197
|
# but non-float types like np.int16 can't use that.
|
222
198
|
# So zero it is.
|
@@ -226,11 +202,10 @@ class LazyNumpyTensor(LazyBase):
|
|
226
202
|
def astype(self, dtype, *args, **kwargs):
|
227
203
|
meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape)
|
228
204
|
full_args = (self, dtype,) + args
|
229
|
-
|
230
|
-
return type(self)(meta=meta, args=full_args, lazy=self._lazy, func=(lambda a: a[0].astype(*a[1:], **kwargs)))
|
205
|
+
return type(self)(meta=meta, args=full_args, kwargs=kwargs, func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)))
|
231
206
|
|
232
207
|
def tofile(self, *args, **kwargs):
|
233
208
|
eager = LazyNumpyTensor.to_eager(self)
|
234
209
|
return eager.tofile(*args, **kwargs)
|
235
210
|
|
236
|
-
# TODO: __array_function__
|
211
|
+
# TODO: __array_function__
|
@@ -10,7 +10,7 @@ class TensorNameMap:
|
|
10
10
|
# Token embeddings
|
11
11
|
MODEL_TENSOR.TOKEN_EMBD: (
|
12
12
|
"gpt_neox.embed_in", # gptneox
|
13
|
-
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx
|
13
|
+
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais
|
14
14
|
"transformer.word_embeddings", # falcon
|
15
15
|
"word_embeddings", # bloom
|
16
16
|
"model.embed_tokens", # llama-hf
|
@@ -24,6 +24,9 @@ class TensorNameMap:
|
|
24
24
|
"backbone.embedding", # mamba
|
25
25
|
"backbone.embeddings", # mamba-hf
|
26
26
|
"transformer.in_out_embed", # Grok
|
27
|
+
"embedding.word_embeddings", # chatglm
|
28
|
+
"transformer.token_embeddings", # openelm
|
29
|
+
"shared", # t5
|
27
30
|
),
|
28
31
|
|
29
32
|
# Token type embeddings
|
@@ -36,6 +39,7 @@ class TensorNameMap:
|
|
36
39
|
"word_embeddings_layernorm", # bloom
|
37
40
|
"embeddings.LayerNorm", # bert
|
38
41
|
"emb_ln", # nomic-bert
|
42
|
+
"transformer.norm", # openelm
|
39
43
|
),
|
40
44
|
|
41
45
|
# Position embeddings
|
@@ -48,16 +52,17 @@ class TensorNameMap:
|
|
48
52
|
# Output
|
49
53
|
MODEL_TENSOR.OUTPUT: (
|
50
54
|
"embed_out", # gptneox
|
51
|
-
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx
|
55
|
+
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
|
52
56
|
"output", # llama-pth bloom internlm2
|
53
57
|
"word_embeddings_for_head", # persimmon
|
54
58
|
"lm_head.linear", # phi2
|
59
|
+
"output_layer", # chatglm
|
55
60
|
),
|
56
61
|
|
57
62
|
# Output norm
|
58
63
|
MODEL_TENSOR.OUTPUT_NORM: (
|
59
64
|
"gpt_neox.final_layer_norm", # gptneox
|
60
|
-
"transformer.ln_f", # gpt2 gpt-j falcon
|
65
|
+
"transformer.ln_f", # gpt2 gpt-j falcon jais
|
61
66
|
"model.norm", # llama-hf baichuan internlm2
|
62
67
|
"norm", # llama-pth
|
63
68
|
"transformer.norm_f", # mpt dbrx
|
@@ -68,11 +73,14 @@ class TensorNameMap:
|
|
68
73
|
"model.norm_f", # mamba-qbert
|
69
74
|
"backbone.norm_f", # mamba
|
70
75
|
"transformer.rms_norm", # Grok
|
76
|
+
"encoder.final_layernorm", # chatglm
|
77
|
+
"transformer.norm", # openelm
|
71
78
|
),
|
72
79
|
|
73
80
|
# Rope frequencies
|
74
81
|
MODEL_TENSOR.ROPE_FREQS: (
|
75
82
|
"rope.freqs", # llama-pth
|
83
|
+
"rotary_pos_emb.inv_freq", # chatglm
|
76
84
|
),
|
77
85
|
}
|
78
86
|
|
@@ -80,7 +88,7 @@ class TensorNameMap:
|
|
80
88
|
# Attention norm
|
81
89
|
MODEL_TENSOR.ATTN_NORM: (
|
82
90
|
"gpt_neox.layers.{bid}.input_layernorm", # gptneox
|
83
|
-
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen
|
91
|
+
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais
|
84
92
|
"transformer.blocks.{bid}.norm_1", # mpt
|
85
93
|
"transformer.h.{bid}.input_layernorm", # falcon7b
|
86
94
|
"h.{bid}.input_layernorm", # bloom
|
@@ -97,17 +105,20 @@ class TensorNameMap:
|
|
97
105
|
"backbone.layers.{bid}.norm", # mamba
|
98
106
|
"transformer.decoder_layer.{bid}.rms_norm", # Grok
|
99
107
|
"transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
|
108
|
+
"encoder.layers.{bid}.input_layernorm", # chatglm
|
109
|
+
"transformer.layers.{bid}.attn_norm", # openelm
|
100
110
|
),
|
101
111
|
|
102
112
|
# Attention norm 2
|
103
113
|
MODEL_TENSOR.ATTN_NORM_2: (
|
104
114
|
"transformer.h.{bid}.ln_attn", # falcon40b
|
115
|
+
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
|
105
116
|
),
|
106
117
|
|
107
118
|
# Attention query-key-value
|
108
119
|
MODEL_TENSOR.ATTN_QKV: (
|
109
120
|
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
|
110
|
-
"transformer.h.{bid}.attn.c_attn", # gpt2 qwen
|
121
|
+
"transformer.h.{bid}.attn.c_attn", # gpt2 qwen jais
|
111
122
|
"transformer.blocks.{bid}.attn.Wqkv", # mpt
|
112
123
|
"transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
|
113
124
|
"transformer.h.{bid}.self_attention.query_key_value", # falcon
|
@@ -117,7 +128,9 @@ class TensorNameMap:
|
|
117
128
|
"h.{bid}.attn.c_attn", # gpt2
|
118
129
|
"transformer.h.{bid}.mixer.Wqkv", # phi2
|
119
130
|
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
|
120
|
-
"model.layers.{bid}.self_attn.qkv_proj"
|
131
|
+
"model.layers.{bid}.self_attn.qkv_proj", # phi3
|
132
|
+
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
|
133
|
+
"transformer.layers.{bid}.attn.qkv_proj", # openelm
|
121
134
|
),
|
122
135
|
|
123
136
|
# Attention query
|
@@ -128,7 +141,7 @@ class TensorNameMap:
|
|
128
141
|
"transformer.h.{bid}.attn.q_proj", # gpt-j
|
129
142
|
"model.layers.layers.{bid}.self_attn.q_proj", # plamo
|
130
143
|
"model.layers.{bid}.attention.wq", # internlm2
|
131
|
-
"transformer.decoder_layer.{bid}.multi_head_attention.query"
|
144
|
+
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
|
132
145
|
),
|
133
146
|
|
134
147
|
# Attention key
|
@@ -140,7 +153,7 @@ class TensorNameMap:
|
|
140
153
|
"transformer.h.{bid}.attn.k", # refact
|
141
154
|
"model.layers.layers.{bid}.self_attn.k_proj", # plamo
|
142
155
|
"model.layers.{bid}.attention.wk", # internlm2
|
143
|
-
"transformer.decoder_layer.{bid}.multi_head_attention.key"
|
156
|
+
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
|
144
157
|
),
|
145
158
|
|
146
159
|
# Attention value
|
@@ -158,7 +171,7 @@ class TensorNameMap:
|
|
158
171
|
# Attention output
|
159
172
|
MODEL_TENSOR.ATTN_OUT: (
|
160
173
|
"gpt_neox.layers.{bid}.attention.dense", # gptneox
|
161
|
-
"transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen
|
174
|
+
"transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen jais
|
162
175
|
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
163
176
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
164
177
|
"h.{bid}.self_attention.dense", # bloom
|
@@ -175,6 +188,8 @@ class TensorNameMap:
|
|
175
188
|
"encoder.layers.{bid}.attn.out_proj", # nomic-bert
|
176
189
|
"transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
|
177
190
|
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
|
191
|
+
"encoder.layers.{bid}.self_attention.dense", # chatglm
|
192
|
+
"transformer.layers.{bid}.attn.out_proj", # openelm
|
178
193
|
),
|
179
194
|
|
180
195
|
# Attention output norm
|
@@ -200,7 +215,7 @@ class TensorNameMap:
|
|
200
215
|
# Feed-forward norm
|
201
216
|
MODEL_TENSOR.FFN_NORM: (
|
202
217
|
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
|
203
|
-
"transformer.h.{bid}.ln_2", # gpt2 refact qwen
|
218
|
+
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais
|
204
219
|
"h.{bid}.post_attention_layernorm", # bloom
|
205
220
|
"transformer.blocks.{bid}.norm_2", # mpt
|
206
221
|
"model.layers.{bid}.post_attention_layernorm", # llama-hf
|
@@ -210,6 +225,8 @@ class TensorNameMap:
|
|
210
225
|
"h.{bid}.ln_2", # gpt2
|
211
226
|
"model.layers.{bid}.ffn_norm", # internlm2
|
212
227
|
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
|
228
|
+
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
229
|
+
"transformer.layers.{bid}.ffn_norm", # openelm
|
213
230
|
),
|
214
231
|
|
215
232
|
# Post feed-forward norm
|
@@ -237,7 +254,7 @@ class TensorNameMap:
|
|
237
254
|
# Feed-forward up
|
238
255
|
MODEL_TENSOR.FFN_UP: (
|
239
256
|
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
|
240
|
-
"transformer.h.{bid}.mlp.c_fc", # gpt2
|
257
|
+
"transformer.h.{bid}.mlp.c_fc", # gpt2 jais
|
241
258
|
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
242
259
|
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
243
260
|
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
@@ -259,6 +276,7 @@ class TensorNameMap:
|
|
259
276
|
"model.layers.{bid}.mlp.c_fc", # starcoder2
|
260
277
|
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
|
261
278
|
"model.layers.{bid}.residual_mlp.w3", # arctic
|
279
|
+
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
262
280
|
),
|
263
281
|
|
264
282
|
MODEL_TENSOR.FFN_UP_EXP: (
|
@@ -270,6 +288,7 @@ class TensorNameMap:
|
|
270
288
|
|
271
289
|
MODEL_TENSOR.FFN_UP_SHEXP: (
|
272
290
|
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
291
|
+
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
|
273
292
|
),
|
274
293
|
|
275
294
|
# AWQ-activation gate
|
@@ -282,6 +301,7 @@ class TensorNameMap:
|
|
282
301
|
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
|
283
302
|
"layers.{bid}.feed_forward.w1", # llama-pth
|
284
303
|
"transformer.h.{bid}.mlp.w2", # qwen
|
304
|
+
"transformer.h.{bid}.mlp.c_fc2", # jais
|
285
305
|
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
|
286
306
|
"model.layers.{bid}.feed_forward.w1", # internlm2
|
287
307
|
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
|
@@ -299,12 +319,13 @@ class TensorNameMap:
|
|
299
319
|
|
300
320
|
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
301
321
|
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
322
|
+
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
|
302
323
|
),
|
303
324
|
|
304
325
|
# Feed-forward down
|
305
326
|
MODEL_TENSOR.FFN_DOWN: (
|
306
327
|
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
|
307
|
-
"transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen
|
328
|
+
"transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen jais
|
308
329
|
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
309
330
|
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
310
331
|
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
@@ -322,7 +343,10 @@ class TensorNameMap:
|
|
322
343
|
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
|
323
344
|
"model.layers.{bid}.mlp.c_proj", # starcoder2
|
324
345
|
"encoder.layer.{bid}.mlp.wo", # jina-bert-v2
|
346
|
+
"transformer.layers.{bid}.ffn.proj_2", # openelm
|
325
347
|
"model.layers.{bid}.residual_mlp.w2", # arctic
|
348
|
+
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
349
|
+
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
326
350
|
),
|
327
351
|
|
328
352
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
@@ -334,6 +358,7 @@ class TensorNameMap:
|
|
334
358
|
|
335
359
|
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
336
360
|
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
|
361
|
+
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
|
337
362
|
),
|
338
363
|
|
339
364
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
@@ -341,7 +366,8 @@ class TensorNameMap:
|
|
341
366
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
342
367
|
"model.layers.{bid}.self_attn.q_norm", # cohere
|
343
368
|
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
344
|
-
"encoder.layer.{bid}.attention.self.layer_norm_q"
|
369
|
+
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
370
|
+
"transformer.layers.{bid}.attn.q_norm", # openelm
|
345
371
|
),
|
346
372
|
|
347
373
|
MODEL_TENSOR.ATTN_K_NORM: (
|
@@ -349,7 +375,8 @@ class TensorNameMap:
|
|
349
375
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
350
376
|
"model.layers.{bid}.self_attn.k_norm", # cohere
|
351
377
|
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
352
|
-
"encoder.layer.{bid}.attention.self.layer_norm_k"
|
378
|
+
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
379
|
+
"transformer.layers.{bid}.attn.k_norm", # openelm
|
353
380
|
),
|
354
381
|
|
355
382
|
MODEL_TENSOR.ROPE_FREQS: (
|
@@ -361,6 +388,7 @@ class TensorNameMap:
|
|
361
388
|
"encoder.layers.{bid}.norm2", # nomic-bert
|
362
389
|
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
363
390
|
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
391
|
+
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code
|
364
392
|
),
|
365
393
|
|
366
394
|
MODEL_TENSOR.SSM_IN: (
|
@@ -397,6 +425,152 @@ class TensorNameMap:
|
|
397
425
|
"model.layers.{bid}.out_proj",
|
398
426
|
"backbone.layers.{bid}.mixer.out_proj",
|
399
427
|
),
|
428
|
+
|
429
|
+
MODEL_TENSOR.ATTN_Q_A: (
|
430
|
+
"model.layers.{bid}.self_attn.q_a_proj", # deepseek2
|
431
|
+
),
|
432
|
+
|
433
|
+
MODEL_TENSOR.ATTN_Q_B: (
|
434
|
+
"model.layers.{bid}.self_attn.q_b_proj", # deepseek2
|
435
|
+
),
|
436
|
+
|
437
|
+
MODEL_TENSOR.ATTN_KV_A_MQA: (
|
438
|
+
"model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
|
439
|
+
),
|
440
|
+
|
441
|
+
MODEL_TENSOR.ATTN_KV_B: (
|
442
|
+
"model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
|
443
|
+
),
|
444
|
+
|
445
|
+
MODEL_TENSOR.ATTN_Q_A_NORM: (
|
446
|
+
"model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
|
447
|
+
),
|
448
|
+
|
449
|
+
MODEL_TENSOR.ATTN_KV_A_NORM: (
|
450
|
+
"model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
|
451
|
+
),
|
452
|
+
|
453
|
+
MODEL_TENSOR.ATTN_SUB_NORM: (
|
454
|
+
"model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
|
455
|
+
),
|
456
|
+
|
457
|
+
MODEL_TENSOR.FFN_SUB_NORM: (
|
458
|
+
"model.layers.{bid}.mlp.ffn_layernorm", # bitnet
|
459
|
+
),
|
460
|
+
|
461
|
+
MODEL_TENSOR.DEC_ATTN_NORM: (
|
462
|
+
"decoder.block.{bid}.layer.0.layer_norm", # t5
|
463
|
+
),
|
464
|
+
|
465
|
+
MODEL_TENSOR.DEC_ATTN_Q: (
|
466
|
+
"decoder.block.{bid}.layer.0.SelfAttention.q", # t5
|
467
|
+
),
|
468
|
+
|
469
|
+
MODEL_TENSOR.DEC_ATTN_K: (
|
470
|
+
"decoder.block.{bid}.layer.0.SelfAttention.k", # t5
|
471
|
+
),
|
472
|
+
|
473
|
+
MODEL_TENSOR.DEC_ATTN_V: (
|
474
|
+
"decoder.block.{bid}.layer.0.SelfAttention.v", # t5
|
475
|
+
),
|
476
|
+
|
477
|
+
MODEL_TENSOR.DEC_ATTN_OUT: (
|
478
|
+
"decoder.block.{bid}.layer.0.SelfAttention.o", # t5
|
479
|
+
),
|
480
|
+
|
481
|
+
MODEL_TENSOR.DEC_ATTN_REL_B: (
|
482
|
+
"decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
|
483
|
+
),
|
484
|
+
|
485
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
|
486
|
+
"decoder.block.{bid}.layer.1.layer_norm", # t5
|
487
|
+
),
|
488
|
+
|
489
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
|
490
|
+
"decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
|
491
|
+
),
|
492
|
+
|
493
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_K: (
|
494
|
+
"decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
|
495
|
+
),
|
496
|
+
|
497
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_V: (
|
498
|
+
"decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
|
499
|
+
),
|
500
|
+
|
501
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
|
502
|
+
"decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
|
503
|
+
),
|
504
|
+
|
505
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
|
506
|
+
"decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
|
507
|
+
),
|
508
|
+
|
509
|
+
MODEL_TENSOR.DEC_FFN_NORM: (
|
510
|
+
"decoder.block.{bid}.layer.2.layer_norm", # t5
|
511
|
+
),
|
512
|
+
|
513
|
+
MODEL_TENSOR.DEC_FFN_GATE: (
|
514
|
+
"decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
|
515
|
+
),
|
516
|
+
|
517
|
+
MODEL_TENSOR.DEC_FFN_UP: (
|
518
|
+
"decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
|
519
|
+
"decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
|
520
|
+
),
|
521
|
+
|
522
|
+
MODEL_TENSOR.DEC_FFN_DOWN: (
|
523
|
+
"decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
|
524
|
+
),
|
525
|
+
|
526
|
+
MODEL_TENSOR.DEC_OUTPUT_NORM: (
|
527
|
+
"decoder.final_layer_norm", # t5
|
528
|
+
),
|
529
|
+
|
530
|
+
MODEL_TENSOR.ENC_ATTN_NORM: (
|
531
|
+
"encoder.block.{bid}.layer.0.layer_norm", # t5
|
532
|
+
),
|
533
|
+
|
534
|
+
MODEL_TENSOR.ENC_ATTN_Q: (
|
535
|
+
"encoder.block.{bid}.layer.0.SelfAttention.q", # t5
|
536
|
+
),
|
537
|
+
|
538
|
+
MODEL_TENSOR.ENC_ATTN_K: (
|
539
|
+
"encoder.block.{bid}.layer.0.SelfAttention.k", # t5
|
540
|
+
),
|
541
|
+
|
542
|
+
MODEL_TENSOR.ENC_ATTN_V: (
|
543
|
+
"encoder.block.{bid}.layer.0.SelfAttention.v", # t5
|
544
|
+
),
|
545
|
+
|
546
|
+
MODEL_TENSOR.ENC_ATTN_OUT: (
|
547
|
+
"encoder.block.{bid}.layer.0.SelfAttention.o", # t5
|
548
|
+
),
|
549
|
+
|
550
|
+
MODEL_TENSOR.ENC_ATTN_REL_B: (
|
551
|
+
"encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
|
552
|
+
),
|
553
|
+
|
554
|
+
MODEL_TENSOR.ENC_FFN_NORM: (
|
555
|
+
"encoder.block.{bid}.layer.1.layer_norm", # t5
|
556
|
+
),
|
557
|
+
|
558
|
+
MODEL_TENSOR.ENC_FFN_GATE: (
|
559
|
+
"encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
|
560
|
+
),
|
561
|
+
|
562
|
+
MODEL_TENSOR.ENC_FFN_UP: (
|
563
|
+
"encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
|
564
|
+
"encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
|
565
|
+
),
|
566
|
+
|
567
|
+
MODEL_TENSOR.ENC_FFN_DOWN: (
|
568
|
+
"encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
|
569
|
+
),
|
570
|
+
|
571
|
+
MODEL_TENSOR.ENC_OUTPUT_NORM: (
|
572
|
+
"encoder.final_layer_norm", # t5
|
573
|
+
),
|
400
574
|
}
|
401
575
|
|
402
576
|
# architecture-specific block mappings
|
@@ -428,14 +602,12 @@ class TensorNameMap:
|
|
428
602
|
for tensor, keys in self.block_mappings_cfg.items():
|
429
603
|
if tensor not in MODEL_TENSORS[arch]:
|
430
604
|
continue
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
key = key.format(bid = bid, xid = xid)
|
438
|
-
self.mapping[key] = (tensor, tensor_name)
|
605
|
+
|
606
|
+
tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
|
607
|
+
self.mapping[tensor_name] = (tensor, tensor_name)
|
608
|
+
for key in keys:
|
609
|
+
key = key.format(bid = bid)
|
610
|
+
self.mapping[key] = (tensor, tensor_name)
|
439
611
|
|
440
612
|
def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
|
441
613
|
result = self.mapping.get(key)
|
@@ -474,4 +646,4 @@ class TensorNameMap:
|
|
474
646
|
|
475
647
|
|
476
648
|
def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
|
477
|
-
return TensorNameMap(arch, n_blocks)
|
649
|
+
return TensorNameMap(arch, n_blocks)
|
bigdl/cpp/libs/baby-llama.exe
CHANGED
Binary file
|
bigdl/cpp/libs/batched-bench.exe
CHANGED
Binary file
|
bigdl/cpp/libs/batched.exe
CHANGED
Binary file
|
bigdl/cpp/libs/beam-search.exe
CHANGED
Binary file
|
bigdl/cpp/libs/benchmark.exe
CHANGED
Binary file
|
bigdl/cpp/libs/common.lib
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/embedding.exe
CHANGED
Binary file
|
bigdl/cpp/libs/export-lora.exe
CHANGED
Binary file
|
bigdl/cpp/libs/finetune.exe
CHANGED
Binary file
|
bigdl/cpp/libs/ggml_shared.dll
CHANGED
Binary file
|
bigdl/cpp/libs/gguf.exe
CHANGED
Binary file
|
bigdl/cpp/libs/gritlm.exe
CHANGED
Binary file
|
bigdl/cpp/libs/imatrix.exe
CHANGED
Binary file
|
bigdl/cpp/libs/infill.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-bench.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama.dll
CHANGED
Binary file
|
bigdl/cpp/libs/llava-cli.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llava_shared.dll
CHANGED
Binary file
|
bigdl/cpp/libs/lookahead.exe
CHANGED
Binary file
|
bigdl/cpp/libs/lookup.exe
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/main.exe
CHANGED
Binary file
|
bigdl/cpp/libs/ollama.exe
CHANGED
Binary file
|
bigdl/cpp/libs/parallel.exe
CHANGED
Binary file
|
bigdl/cpp/libs/passkey.exe
CHANGED
Binary file
|
bigdl/cpp/libs/perplexity.exe
CHANGED
Binary file
|
bigdl/cpp/libs/q8dot.exe
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/quantize.exe
CHANGED
Binary file
|