bigdl-core-cpp 2.5.0b20240724__py3-none-win_amd64.whl → 2.5.0b20240726__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert-hf-to-gguf.py +1148 -315
- bigdl/cpp/gguf-py/gguf/__init__.py +2 -0
- bigdl/cpp/gguf-py/gguf/constants.py +463 -167
- bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +29 -8
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +475 -156
- bigdl/cpp/gguf-py/gguf/lazy.py +24 -49
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +209 -23
- bigdl/cpp/libs/baby-llama.exe +0 -0
- bigdl/cpp/libs/batched-bench.exe +0 -0
- bigdl/cpp/libs/batched.exe +0 -0
- bigdl/cpp/libs/beam-search.exe +0 -0
- bigdl/cpp/libs/benchmark.exe +0 -0
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/embedding.exe +0 -0
- bigdl/cpp/libs/export-lora.exe +0 -0
- bigdl/cpp/libs/finetune.exe +0 -0
- bigdl/cpp/libs/ggml_shared.dll +0 -0
- bigdl/cpp/libs/gguf.exe +0 -0
- bigdl/cpp/libs/gritlm.exe +0 -0
- bigdl/cpp/libs/imatrix.exe +0 -0
- bigdl/cpp/libs/infill.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava-cli.exe +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/lookahead.exe +0 -0
- bigdl/cpp/libs/lookup.exe +0 -0
- bigdl/cpp/libs/ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/main.exe +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/parallel.exe +0 -0
- bigdl/cpp/libs/passkey.exe +0 -0
- bigdl/cpp/libs/perplexity.exe +0 -0
- bigdl/cpp/libs/q8dot.exe +0 -0
- bigdl/cpp/libs/quantize-stats.exe +0 -0
- bigdl/cpp/libs/quantize.exe +0 -0
- bigdl/cpp/libs/save-load-state.exe +0 -0
- bigdl/cpp/libs/server.exe +0 -0
- bigdl/cpp/libs/simple.exe +0 -0
- bigdl/cpp/libs/speculative.exe +0 -0
- bigdl/cpp/libs/tokenize.exe +0 -0
- bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
- bigdl/cpp/libs/vdot.exe +0 -0
- {bigdl_core_cpp-2.5.0b20240724.dist-info → bigdl_core_cpp-2.5.0b20240726.dist-info}/METADATA +1 -1
- bigdl_core_cpp-2.5.0b20240726.dist-info/RECORD +61 -0
- bigdl_core_cpp-2.5.0b20240724.dist-info/RECORD +0 -61
- {bigdl_core_cpp-2.5.0b20240724.data → bigdl_core_cpp-2.5.0b20240726.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.5.0b20240724.data → bigdl_core_cpp-2.5.0b20240726.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.5.0b20240724.data → bigdl_core_cpp-2.5.0b20240726.data}/scripts/init-ollama.bat +0 -0
- {bigdl_core_cpp-2.5.0b20240724.dist-info → bigdl_core_cpp-2.5.0b20240726.dist-info}/WHEEL +0 -0
- {bigdl_core_cpp-2.5.0b20240724.dist-info → bigdl_core_cpp-2.5.0b20240726.dist-info}/top_level.txt +0 -0
bigdl/cpp/gguf-py/gguf/lazy.py
CHANGED
@@ -3,10 +3,8 @@ from abc import ABC, ABCMeta, abstractmethod
|
|
3
3
|
|
4
4
|
import logging
|
5
5
|
from typing import Any, Callable
|
6
|
-
from collections import deque
|
7
6
|
|
8
7
|
import numpy as np
|
9
|
-
from numpy._typing import _Shape
|
10
8
|
from numpy.typing import DTypeLike
|
11
9
|
|
12
10
|
|
@@ -16,16 +14,16 @@ logger = logging.getLogger(__name__)
|
|
16
14
|
class LazyMeta(ABCMeta):
|
17
15
|
|
18
16
|
def __new__(cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs):
|
19
|
-
def __getattr__(self,
|
20
|
-
meta_attr = getattr(self._meta,
|
17
|
+
def __getattr__(self, name: str) -> Any:
|
18
|
+
meta_attr = getattr(self._meta, name)
|
21
19
|
if callable(meta_attr):
|
22
20
|
return type(self)._wrap_fn(
|
23
|
-
(lambda s, *args, **kwargs: getattr(s,
|
21
|
+
(lambda s, *args, **kwargs: getattr(s, name)(*args, **kwargs)),
|
24
22
|
use_self=self,
|
25
23
|
)
|
26
24
|
elif isinstance(meta_attr, self._tensor_type):
|
27
25
|
# e.g. self.T with torch.Tensor should still be wrapped
|
28
|
-
return type(self)._wrap_fn(lambda s: getattr(s,
|
26
|
+
return type(self)._wrap_fn(lambda s: getattr(s, name))(self)
|
29
27
|
else:
|
30
28
|
# no need to wrap non-tensor properties,
|
31
29
|
# and they likely don't depend on the actual contents of the tensor
|
@@ -75,20 +73,18 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
|
75
73
|
_tensor_type: type
|
76
74
|
_meta: Any
|
77
75
|
_data: Any | None
|
78
|
-
_lazy: deque[LazyBase] # shared within a graph, to avoid deep recursion when making eager
|
79
76
|
_args: tuple
|
80
|
-
|
77
|
+
_kwargs: dict[str, Any]
|
78
|
+
_func: Callable[[Any], Any] | None
|
81
79
|
|
82
|
-
def __init__(self, *, meta: Any, data: Any | None = None,
|
80
|
+
def __init__(self, *, meta: Any, data: Any | None = None, args: tuple = (), kwargs: dict[str, Any] | None = None, func: Callable[[Any], Any] | None = None):
|
83
81
|
super().__init__()
|
84
82
|
self._meta = meta
|
85
83
|
self._data = data
|
86
|
-
self._lazy = lazy if lazy is not None else deque()
|
87
84
|
self._args = args
|
85
|
+
self._kwargs = kwargs if kwargs is not None else {}
|
88
86
|
self._func = func
|
89
87
|
assert self._func is not None or self._data is not None
|
90
|
-
if self._data is None:
|
91
|
-
self._lazy.append(self)
|
92
88
|
|
93
89
|
def __init_subclass__(cls) -> None:
|
94
90
|
if "_tensor_type" not in cls.__dict__:
|
@@ -118,6 +114,7 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
|
118
114
|
args = ((use_self,) if use_self is not None else ()) + args
|
119
115
|
|
120
116
|
meta_args = LazyBase._recurse_apply(args, lambda t: t._meta)
|
117
|
+
# TODO: maybe handle tensors in kwargs too
|
121
118
|
|
122
119
|
if isinstance(meta_noop, bool) and not meta_noop:
|
123
120
|
try:
|
@@ -141,21 +138,7 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
|
141
138
|
res = cls.meta_with_dtype_and_shape(meta_noop, res.shape)
|
142
139
|
|
143
140
|
if isinstance(res, cls._tensor_type):
|
144
|
-
|
145
|
-
if collect_replace.shared_lazy is None:
|
146
|
-
collect_replace.shared_lazy = t._lazy
|
147
|
-
else:
|
148
|
-
collect_replace.shared_lazy.extend(t._lazy)
|
149
|
-
t._lazy = collect_replace.shared_lazy
|
150
|
-
|
151
|
-
# emulating a static variable
|
152
|
-
collect_replace.shared_lazy = None
|
153
|
-
|
154
|
-
LazyBase._recurse_apply(args, collect_replace)
|
155
|
-
|
156
|
-
shared_lazy = collect_replace.shared_lazy
|
157
|
-
|
158
|
-
return cls(meta=cls.eager_to_meta(res), lazy=shared_lazy, args=args, func=lambda a: fn(*a, **kwargs))
|
141
|
+
return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
|
159
142
|
else:
|
160
143
|
del res # not needed
|
161
144
|
# non-tensor return likely relies on the contents of the args
|
@@ -167,25 +150,18 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
|
167
150
|
@classmethod
|
168
151
|
def to_eager(cls, t: Any) -> Any:
|
169
152
|
def simple_to_eager(_t: LazyBase) -> Any:
|
170
|
-
|
171
|
-
assert _t._data is not None
|
153
|
+
if _t._data is not None:
|
172
154
|
return _t._data
|
173
155
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
assert lt._func is not None
|
184
|
-
lt._args = cls._recurse_apply(lt._args, already_eager_to_eager)
|
185
|
-
lt._data = lt._func(lt._args)
|
186
|
-
# sanity check
|
187
|
-
assert lt._data.dtype == lt._meta.dtype
|
188
|
-
assert lt._data.shape == lt._meta.shape
|
156
|
+
# NOTE: there's a recursion limit in Python (usually 1000)
|
157
|
+
|
158
|
+
assert _t._func is not None
|
159
|
+
_t._args = cls._recurse_apply(_t._args, simple_to_eager)
|
160
|
+
_t._data = _t._func(*_t._args, **_t._kwargs)
|
161
|
+
# sanity check
|
162
|
+
assert _t._data is not None
|
163
|
+
assert _t._data.dtype == _t._meta.dtype
|
164
|
+
assert _t._data.shape == _t._meta.shape
|
189
165
|
|
190
166
|
return _t._data
|
191
167
|
|
@@ -204,7 +180,7 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
|
204
180
|
@classmethod
|
205
181
|
def from_eager(cls, t: Any) -> Any:
|
206
182
|
if type(t) is cls:
|
207
|
-
# already
|
183
|
+
# already lazy
|
208
184
|
return t
|
209
185
|
elif isinstance(t, cls._tensor_type):
|
210
186
|
return cls(meta=cls.eager_to_meta(t), data=t)
|
@@ -216,7 +192,7 @@ class LazyNumpyTensor(LazyBase):
|
|
216
192
|
_tensor_type = np.ndarray
|
217
193
|
|
218
194
|
@classmethod
|
219
|
-
def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape:
|
195
|
+
def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) -> np.ndarray[Any, Any]:
|
220
196
|
# The initial idea was to use np.nan as the fill value,
|
221
197
|
# but non-float types like np.int16 can't use that.
|
222
198
|
# So zero it is.
|
@@ -226,11 +202,10 @@ class LazyNumpyTensor(LazyBase):
|
|
226
202
|
def astype(self, dtype, *args, **kwargs):
|
227
203
|
meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape)
|
228
204
|
full_args = (self, dtype,) + args
|
229
|
-
|
230
|
-
return type(self)(meta=meta, args=full_args, lazy=self._lazy, func=(lambda a: a[0].astype(*a[1:], **kwargs)))
|
205
|
+
return type(self)(meta=meta, args=full_args, kwargs=kwargs, func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)))
|
231
206
|
|
232
207
|
def tofile(self, *args, **kwargs):
|
233
208
|
eager = LazyNumpyTensor.to_eager(self)
|
234
209
|
return eager.tofile(*args, **kwargs)
|
235
210
|
|
236
|
-
# TODO: __array_function__
|
211
|
+
# TODO: __array_function__
|
@@ -10,7 +10,7 @@ class TensorNameMap:
|
|
10
10
|
# Token embeddings
|
11
11
|
MODEL_TENSOR.TOKEN_EMBD: (
|
12
12
|
"gpt_neox.embed_in", # gptneox
|
13
|
-
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx
|
13
|
+
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais
|
14
14
|
"transformer.word_embeddings", # falcon
|
15
15
|
"word_embeddings", # bloom
|
16
16
|
"model.embed_tokens", # llama-hf
|
@@ -24,6 +24,9 @@ class TensorNameMap:
|
|
24
24
|
"backbone.embedding", # mamba
|
25
25
|
"backbone.embeddings", # mamba-hf
|
26
26
|
"transformer.in_out_embed", # Grok
|
27
|
+
"embedding.word_embeddings", # chatglm
|
28
|
+
"transformer.token_embeddings", # openelm
|
29
|
+
"shared", # t5
|
27
30
|
),
|
28
31
|
|
29
32
|
# Token type embeddings
|
@@ -36,6 +39,7 @@ class TensorNameMap:
|
|
36
39
|
"word_embeddings_layernorm", # bloom
|
37
40
|
"embeddings.LayerNorm", # bert
|
38
41
|
"emb_ln", # nomic-bert
|
42
|
+
"transformer.norm", # openelm
|
39
43
|
),
|
40
44
|
|
41
45
|
# Position embeddings
|
@@ -48,16 +52,17 @@ class TensorNameMap:
|
|
48
52
|
# Output
|
49
53
|
MODEL_TENSOR.OUTPUT: (
|
50
54
|
"embed_out", # gptneox
|
51
|
-
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx
|
55
|
+
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
|
52
56
|
"output", # llama-pth bloom internlm2
|
53
57
|
"word_embeddings_for_head", # persimmon
|
54
58
|
"lm_head.linear", # phi2
|
59
|
+
"output_layer", # chatglm
|
55
60
|
),
|
56
61
|
|
57
62
|
# Output norm
|
58
63
|
MODEL_TENSOR.OUTPUT_NORM: (
|
59
64
|
"gpt_neox.final_layer_norm", # gptneox
|
60
|
-
"transformer.ln_f", # gpt2 gpt-j falcon
|
65
|
+
"transformer.ln_f", # gpt2 gpt-j falcon jais
|
61
66
|
"model.norm", # llama-hf baichuan internlm2
|
62
67
|
"norm", # llama-pth
|
63
68
|
"transformer.norm_f", # mpt dbrx
|
@@ -68,11 +73,14 @@ class TensorNameMap:
|
|
68
73
|
"model.norm_f", # mamba-qbert
|
69
74
|
"backbone.norm_f", # mamba
|
70
75
|
"transformer.rms_norm", # Grok
|
76
|
+
"encoder.final_layernorm", # chatglm
|
77
|
+
"transformer.norm", # openelm
|
71
78
|
),
|
72
79
|
|
73
80
|
# Rope frequencies
|
74
81
|
MODEL_TENSOR.ROPE_FREQS: (
|
75
82
|
"rope.freqs", # llama-pth
|
83
|
+
"rotary_pos_emb.inv_freq", # chatglm
|
76
84
|
),
|
77
85
|
}
|
78
86
|
|
@@ -80,7 +88,7 @@ class TensorNameMap:
|
|
80
88
|
# Attention norm
|
81
89
|
MODEL_TENSOR.ATTN_NORM: (
|
82
90
|
"gpt_neox.layers.{bid}.input_layernorm", # gptneox
|
83
|
-
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen
|
91
|
+
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais
|
84
92
|
"transformer.blocks.{bid}.norm_1", # mpt
|
85
93
|
"transformer.h.{bid}.input_layernorm", # falcon7b
|
86
94
|
"h.{bid}.input_layernorm", # bloom
|
@@ -97,17 +105,20 @@ class TensorNameMap:
|
|
97
105
|
"backbone.layers.{bid}.norm", # mamba
|
98
106
|
"transformer.decoder_layer.{bid}.rms_norm", # Grok
|
99
107
|
"transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
|
108
|
+
"encoder.layers.{bid}.input_layernorm", # chatglm
|
109
|
+
"transformer.layers.{bid}.attn_norm", # openelm
|
100
110
|
),
|
101
111
|
|
102
112
|
# Attention norm 2
|
103
113
|
MODEL_TENSOR.ATTN_NORM_2: (
|
104
114
|
"transformer.h.{bid}.ln_attn", # falcon40b
|
115
|
+
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
|
105
116
|
),
|
106
117
|
|
107
118
|
# Attention query-key-value
|
108
119
|
MODEL_TENSOR.ATTN_QKV: (
|
109
120
|
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
|
110
|
-
"transformer.h.{bid}.attn.c_attn", # gpt2 qwen
|
121
|
+
"transformer.h.{bid}.attn.c_attn", # gpt2 qwen jais
|
111
122
|
"transformer.blocks.{bid}.attn.Wqkv", # mpt
|
112
123
|
"transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
|
113
124
|
"transformer.h.{bid}.self_attention.query_key_value", # falcon
|
@@ -117,7 +128,9 @@ class TensorNameMap:
|
|
117
128
|
"h.{bid}.attn.c_attn", # gpt2
|
118
129
|
"transformer.h.{bid}.mixer.Wqkv", # phi2
|
119
130
|
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
|
120
|
-
"model.layers.{bid}.self_attn.qkv_proj"
|
131
|
+
"model.layers.{bid}.self_attn.qkv_proj", # phi3
|
132
|
+
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
|
133
|
+
"transformer.layers.{bid}.attn.qkv_proj", # openelm
|
121
134
|
),
|
122
135
|
|
123
136
|
# Attention query
|
@@ -128,7 +141,7 @@ class TensorNameMap:
|
|
128
141
|
"transformer.h.{bid}.attn.q_proj", # gpt-j
|
129
142
|
"model.layers.layers.{bid}.self_attn.q_proj", # plamo
|
130
143
|
"model.layers.{bid}.attention.wq", # internlm2
|
131
|
-
"transformer.decoder_layer.{bid}.multi_head_attention.query"
|
144
|
+
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
|
132
145
|
),
|
133
146
|
|
134
147
|
# Attention key
|
@@ -140,7 +153,7 @@ class TensorNameMap:
|
|
140
153
|
"transformer.h.{bid}.attn.k", # refact
|
141
154
|
"model.layers.layers.{bid}.self_attn.k_proj", # plamo
|
142
155
|
"model.layers.{bid}.attention.wk", # internlm2
|
143
|
-
"transformer.decoder_layer.{bid}.multi_head_attention.key"
|
156
|
+
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
|
144
157
|
),
|
145
158
|
|
146
159
|
# Attention value
|
@@ -158,7 +171,7 @@ class TensorNameMap:
|
|
158
171
|
# Attention output
|
159
172
|
MODEL_TENSOR.ATTN_OUT: (
|
160
173
|
"gpt_neox.layers.{bid}.attention.dense", # gptneox
|
161
|
-
"transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen
|
174
|
+
"transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen jais
|
162
175
|
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
163
176
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
164
177
|
"h.{bid}.self_attention.dense", # bloom
|
@@ -175,6 +188,8 @@ class TensorNameMap:
|
|
175
188
|
"encoder.layers.{bid}.attn.out_proj", # nomic-bert
|
176
189
|
"transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
|
177
190
|
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
|
191
|
+
"encoder.layers.{bid}.self_attention.dense", # chatglm
|
192
|
+
"transformer.layers.{bid}.attn.out_proj", # openelm
|
178
193
|
),
|
179
194
|
|
180
195
|
# Attention output norm
|
@@ -185,6 +200,10 @@ class TensorNameMap:
|
|
185
200
|
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
|
186
201
|
),
|
187
202
|
|
203
|
+
MODEL_TENSOR.ATTN_POST_NORM: (
|
204
|
+
"model.layers.{bid}.post_attention_layernorm", # gemma2
|
205
|
+
),
|
206
|
+
|
188
207
|
# Rotary embeddings
|
189
208
|
MODEL_TENSOR.ATTN_ROT_EMBD: (
|
190
209
|
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
|
@@ -196,7 +215,7 @@ class TensorNameMap:
|
|
196
215
|
# Feed-forward norm
|
197
216
|
MODEL_TENSOR.FFN_NORM: (
|
198
217
|
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
|
199
|
-
"transformer.h.{bid}.ln_2", # gpt2 refact qwen
|
218
|
+
"transformer.h.{bid}.ln_2", # gpt2 refact qwen jais
|
200
219
|
"h.{bid}.post_attention_layernorm", # bloom
|
201
220
|
"transformer.blocks.{bid}.norm_2", # mpt
|
202
221
|
"model.layers.{bid}.post_attention_layernorm", # llama-hf
|
@@ -206,6 +225,18 @@ class TensorNameMap:
|
|
206
225
|
"h.{bid}.ln_2", # gpt2
|
207
226
|
"model.layers.{bid}.ffn_norm", # internlm2
|
208
227
|
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
|
228
|
+
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
229
|
+
"transformer.layers.{bid}.ffn_norm", # openelm
|
230
|
+
),
|
231
|
+
|
232
|
+
# Post feed-forward norm
|
233
|
+
MODEL_TENSOR.FFN_PRE_NORM: (
|
234
|
+
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
|
235
|
+
),
|
236
|
+
|
237
|
+
# Post feed-forward norm
|
238
|
+
MODEL_TENSOR.FFN_POST_NORM: (
|
239
|
+
"model.layers.{bid}.post_feedforward_layernorm", # gemma2
|
209
240
|
),
|
210
241
|
|
211
242
|
MODEL_TENSOR.FFN_GATE_INP: (
|
@@ -223,7 +254,7 @@ class TensorNameMap:
|
|
223
254
|
# Feed-forward up
|
224
255
|
MODEL_TENSOR.FFN_UP: (
|
225
256
|
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
|
226
|
-
"transformer.h.{bid}.mlp.c_fc", # gpt2
|
257
|
+
"transformer.h.{bid}.mlp.c_fc", # gpt2 jais
|
227
258
|
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
228
259
|
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
229
260
|
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
@@ -245,6 +276,7 @@ class TensorNameMap:
|
|
245
276
|
"model.layers.{bid}.mlp.c_fc", # starcoder2
|
246
277
|
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
|
247
278
|
"model.layers.{bid}.residual_mlp.w3", # arctic
|
279
|
+
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
248
280
|
),
|
249
281
|
|
250
282
|
MODEL_TENSOR.FFN_UP_EXP: (
|
@@ -256,6 +288,7 @@ class TensorNameMap:
|
|
256
288
|
|
257
289
|
MODEL_TENSOR.FFN_UP_SHEXP: (
|
258
290
|
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
291
|
+
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
|
259
292
|
),
|
260
293
|
|
261
294
|
# AWQ-activation gate
|
@@ -268,6 +301,7 @@ class TensorNameMap:
|
|
268
301
|
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
|
269
302
|
"layers.{bid}.feed_forward.w1", # llama-pth
|
270
303
|
"transformer.h.{bid}.mlp.w2", # qwen
|
304
|
+
"transformer.h.{bid}.mlp.c_fc2", # jais
|
271
305
|
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
|
272
306
|
"model.layers.{bid}.feed_forward.w1", # internlm2
|
273
307
|
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
|
@@ -285,12 +319,13 @@ class TensorNameMap:
|
|
285
319
|
|
286
320
|
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
287
321
|
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
322
|
+
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
|
288
323
|
),
|
289
324
|
|
290
325
|
# Feed-forward down
|
291
326
|
MODEL_TENSOR.FFN_DOWN: (
|
292
327
|
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
|
293
|
-
"transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen
|
328
|
+
"transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen jais
|
294
329
|
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
295
330
|
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
296
331
|
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
@@ -308,7 +343,10 @@ class TensorNameMap:
|
|
308
343
|
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
|
309
344
|
"model.layers.{bid}.mlp.c_proj", # starcoder2
|
310
345
|
"encoder.layer.{bid}.mlp.wo", # jina-bert-v2
|
346
|
+
"transformer.layers.{bid}.ffn.proj_2", # openelm
|
311
347
|
"model.layers.{bid}.residual_mlp.w2", # arctic
|
348
|
+
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
349
|
+
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
312
350
|
),
|
313
351
|
|
314
352
|
MODEL_TENSOR.FFN_DOWN_EXP: (
|
@@ -320,6 +358,7 @@ class TensorNameMap:
|
|
320
358
|
|
321
359
|
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
322
360
|
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
|
361
|
+
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
|
323
362
|
),
|
324
363
|
|
325
364
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
@@ -327,7 +366,8 @@ class TensorNameMap:
|
|
327
366
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
328
367
|
"model.layers.{bid}.self_attn.q_norm", # cohere
|
329
368
|
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
330
|
-
"encoder.layer.{bid}.attention.self.layer_norm_q"
|
369
|
+
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
370
|
+
"transformer.layers.{bid}.attn.q_norm", # openelm
|
331
371
|
),
|
332
372
|
|
333
373
|
MODEL_TENSOR.ATTN_K_NORM: (
|
@@ -335,7 +375,8 @@ class TensorNameMap:
|
|
335
375
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
336
376
|
"model.layers.{bid}.self_attn.k_norm", # cohere
|
337
377
|
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
338
|
-
"encoder.layer.{bid}.attention.self.layer_norm_k"
|
378
|
+
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
379
|
+
"transformer.layers.{bid}.attn.k_norm", # openelm
|
339
380
|
),
|
340
381
|
|
341
382
|
MODEL_TENSOR.ROPE_FREQS: (
|
@@ -347,6 +388,7 @@ class TensorNameMap:
|
|
347
388
|
"encoder.layers.{bid}.norm2", # nomic-bert
|
348
389
|
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
349
390
|
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
391
|
+
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code
|
350
392
|
),
|
351
393
|
|
352
394
|
MODEL_TENSOR.SSM_IN: (
|
@@ -383,6 +425,152 @@ class TensorNameMap:
|
|
383
425
|
"model.layers.{bid}.out_proj",
|
384
426
|
"backbone.layers.{bid}.mixer.out_proj",
|
385
427
|
),
|
428
|
+
|
429
|
+
MODEL_TENSOR.ATTN_Q_A: (
|
430
|
+
"model.layers.{bid}.self_attn.q_a_proj", # deepseek2
|
431
|
+
),
|
432
|
+
|
433
|
+
MODEL_TENSOR.ATTN_Q_B: (
|
434
|
+
"model.layers.{bid}.self_attn.q_b_proj", # deepseek2
|
435
|
+
),
|
436
|
+
|
437
|
+
MODEL_TENSOR.ATTN_KV_A_MQA: (
|
438
|
+
"model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
|
439
|
+
),
|
440
|
+
|
441
|
+
MODEL_TENSOR.ATTN_KV_B: (
|
442
|
+
"model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
|
443
|
+
),
|
444
|
+
|
445
|
+
MODEL_TENSOR.ATTN_Q_A_NORM: (
|
446
|
+
"model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
|
447
|
+
),
|
448
|
+
|
449
|
+
MODEL_TENSOR.ATTN_KV_A_NORM: (
|
450
|
+
"model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
|
451
|
+
),
|
452
|
+
|
453
|
+
MODEL_TENSOR.ATTN_SUB_NORM: (
|
454
|
+
"model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
|
455
|
+
),
|
456
|
+
|
457
|
+
MODEL_TENSOR.FFN_SUB_NORM: (
|
458
|
+
"model.layers.{bid}.mlp.ffn_layernorm", # bitnet
|
459
|
+
),
|
460
|
+
|
461
|
+
MODEL_TENSOR.DEC_ATTN_NORM: (
|
462
|
+
"decoder.block.{bid}.layer.0.layer_norm", # t5
|
463
|
+
),
|
464
|
+
|
465
|
+
MODEL_TENSOR.DEC_ATTN_Q: (
|
466
|
+
"decoder.block.{bid}.layer.0.SelfAttention.q", # t5
|
467
|
+
),
|
468
|
+
|
469
|
+
MODEL_TENSOR.DEC_ATTN_K: (
|
470
|
+
"decoder.block.{bid}.layer.0.SelfAttention.k", # t5
|
471
|
+
),
|
472
|
+
|
473
|
+
MODEL_TENSOR.DEC_ATTN_V: (
|
474
|
+
"decoder.block.{bid}.layer.0.SelfAttention.v", # t5
|
475
|
+
),
|
476
|
+
|
477
|
+
MODEL_TENSOR.DEC_ATTN_OUT: (
|
478
|
+
"decoder.block.{bid}.layer.0.SelfAttention.o", # t5
|
479
|
+
),
|
480
|
+
|
481
|
+
MODEL_TENSOR.DEC_ATTN_REL_B: (
|
482
|
+
"decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
|
483
|
+
),
|
484
|
+
|
485
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
|
486
|
+
"decoder.block.{bid}.layer.1.layer_norm", # t5
|
487
|
+
),
|
488
|
+
|
489
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
|
490
|
+
"decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
|
491
|
+
),
|
492
|
+
|
493
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_K: (
|
494
|
+
"decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
|
495
|
+
),
|
496
|
+
|
497
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_V: (
|
498
|
+
"decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
|
499
|
+
),
|
500
|
+
|
501
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
|
502
|
+
"decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
|
503
|
+
),
|
504
|
+
|
505
|
+
MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
|
506
|
+
"decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
|
507
|
+
),
|
508
|
+
|
509
|
+
MODEL_TENSOR.DEC_FFN_NORM: (
|
510
|
+
"decoder.block.{bid}.layer.2.layer_norm", # t5
|
511
|
+
),
|
512
|
+
|
513
|
+
MODEL_TENSOR.DEC_FFN_GATE: (
|
514
|
+
"decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
|
515
|
+
),
|
516
|
+
|
517
|
+
MODEL_TENSOR.DEC_FFN_UP: (
|
518
|
+
"decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
|
519
|
+
"decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
|
520
|
+
),
|
521
|
+
|
522
|
+
MODEL_TENSOR.DEC_FFN_DOWN: (
|
523
|
+
"decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
|
524
|
+
),
|
525
|
+
|
526
|
+
MODEL_TENSOR.DEC_OUTPUT_NORM: (
|
527
|
+
"decoder.final_layer_norm", # t5
|
528
|
+
),
|
529
|
+
|
530
|
+
MODEL_TENSOR.ENC_ATTN_NORM: (
|
531
|
+
"encoder.block.{bid}.layer.0.layer_norm", # t5
|
532
|
+
),
|
533
|
+
|
534
|
+
MODEL_TENSOR.ENC_ATTN_Q: (
|
535
|
+
"encoder.block.{bid}.layer.0.SelfAttention.q", # t5
|
536
|
+
),
|
537
|
+
|
538
|
+
MODEL_TENSOR.ENC_ATTN_K: (
|
539
|
+
"encoder.block.{bid}.layer.0.SelfAttention.k", # t5
|
540
|
+
),
|
541
|
+
|
542
|
+
MODEL_TENSOR.ENC_ATTN_V: (
|
543
|
+
"encoder.block.{bid}.layer.0.SelfAttention.v", # t5
|
544
|
+
),
|
545
|
+
|
546
|
+
MODEL_TENSOR.ENC_ATTN_OUT: (
|
547
|
+
"encoder.block.{bid}.layer.0.SelfAttention.o", # t5
|
548
|
+
),
|
549
|
+
|
550
|
+
MODEL_TENSOR.ENC_ATTN_REL_B: (
|
551
|
+
"encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
|
552
|
+
),
|
553
|
+
|
554
|
+
MODEL_TENSOR.ENC_FFN_NORM: (
|
555
|
+
"encoder.block.{bid}.layer.1.layer_norm", # t5
|
556
|
+
),
|
557
|
+
|
558
|
+
MODEL_TENSOR.ENC_FFN_GATE: (
|
559
|
+
"encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
|
560
|
+
),
|
561
|
+
|
562
|
+
MODEL_TENSOR.ENC_FFN_UP: (
|
563
|
+
"encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
|
564
|
+
"encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
|
565
|
+
),
|
566
|
+
|
567
|
+
MODEL_TENSOR.ENC_FFN_DOWN: (
|
568
|
+
"encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
|
569
|
+
),
|
570
|
+
|
571
|
+
MODEL_TENSOR.ENC_OUTPUT_NORM: (
|
572
|
+
"encoder.final_layer_norm", # t5
|
573
|
+
),
|
386
574
|
}
|
387
575
|
|
388
576
|
# architecture-specific block mappings
|
@@ -414,14 +602,12 @@ class TensorNameMap:
|
|
414
602
|
for tensor, keys in self.block_mappings_cfg.items():
|
415
603
|
if tensor not in MODEL_TENSORS[arch]:
|
416
604
|
continue
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
key = key.format(bid = bid, xid = xid)
|
424
|
-
self.mapping[key] = (tensor, tensor_name)
|
605
|
+
|
606
|
+
tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
|
607
|
+
self.mapping[tensor_name] = (tensor, tensor_name)
|
608
|
+
for key in keys:
|
609
|
+
key = key.format(bid = bid)
|
610
|
+
self.mapping[key] = (tensor, tensor_name)
|
425
611
|
|
426
612
|
def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
|
427
613
|
result = self.mapping.get(key)
|
@@ -460,4 +646,4 @@ class TensorNameMap:
|
|
460
646
|
|
461
647
|
|
462
648
|
def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
|
463
|
-
return TensorNameMap(arch, n_blocks)
|
649
|
+
return TensorNameMap(arch, n_blocks)
|
bigdl/cpp/libs/baby-llama.exe
CHANGED
Binary file
|
bigdl/cpp/libs/batched-bench.exe
CHANGED
Binary file
|
bigdl/cpp/libs/batched.exe
CHANGED
Binary file
|
bigdl/cpp/libs/beam-search.exe
CHANGED
Binary file
|
bigdl/cpp/libs/benchmark.exe
CHANGED
Binary file
|
bigdl/cpp/libs/common.lib
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
bigdl/cpp/libs/embedding.exe
CHANGED
Binary file
|
bigdl/cpp/libs/export-lora.exe
CHANGED
Binary file
|
bigdl/cpp/libs/finetune.exe
CHANGED
Binary file
|
bigdl/cpp/libs/ggml_shared.dll
CHANGED
Binary file
|
bigdl/cpp/libs/gguf.exe
CHANGED
Binary file
|
bigdl/cpp/libs/gritlm.exe
CHANGED
Binary file
|
bigdl/cpp/libs/imatrix.exe
CHANGED
Binary file
|
bigdl/cpp/libs/infill.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-bench.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama.dll
CHANGED
Binary file
|
bigdl/cpp/libs/llava-cli.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llava_shared.dll
CHANGED
Binary file
|
bigdl/cpp/libs/lookahead.exe
CHANGED
Binary file
|
bigdl/cpp/libs/lookup.exe
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/main.exe
CHANGED
Binary file
|
bigdl/cpp/libs/ollama.exe
CHANGED
Binary file
|