bigdl-core-cpp 2.5.0b20240724__py3-none-win_amd64.whl → 2.5.0b20240726__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. bigdl/cpp/convert-hf-to-gguf.py +1148 -315
  2. bigdl/cpp/gguf-py/gguf/__init__.py +2 -0
  3. bigdl/cpp/gguf-py/gguf/constants.py +463 -167
  4. bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
  5. bigdl/cpp/gguf-py/gguf/gguf_reader.py +29 -8
  6. bigdl/cpp/gguf-py/gguf/gguf_writer.py +475 -156
  7. bigdl/cpp/gguf-py/gguf/lazy.py +24 -49
  8. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +209 -23
  9. bigdl/cpp/libs/baby-llama.exe +0 -0
  10. bigdl/cpp/libs/batched-bench.exe +0 -0
  11. bigdl/cpp/libs/batched.exe +0 -0
  12. bigdl/cpp/libs/beam-search.exe +0 -0
  13. bigdl/cpp/libs/benchmark.exe +0 -0
  14. bigdl/cpp/libs/common.lib +0 -0
  15. bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
  16. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
  17. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
  18. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
  19. bigdl/cpp/libs/embedding.exe +0 -0
  20. bigdl/cpp/libs/export-lora.exe +0 -0
  21. bigdl/cpp/libs/finetune.exe +0 -0
  22. bigdl/cpp/libs/ggml_shared.dll +0 -0
  23. bigdl/cpp/libs/gguf.exe +0 -0
  24. bigdl/cpp/libs/gritlm.exe +0 -0
  25. bigdl/cpp/libs/imatrix.exe +0 -0
  26. bigdl/cpp/libs/infill.exe +0 -0
  27. bigdl/cpp/libs/llama-bench.exe +0 -0
  28. bigdl/cpp/libs/llama.dll +0 -0
  29. bigdl/cpp/libs/llava-cli.exe +0 -0
  30. bigdl/cpp/libs/llava_shared.dll +0 -0
  31. bigdl/cpp/libs/lookahead.exe +0 -0
  32. bigdl/cpp/libs/lookup.exe +0 -0
  33. bigdl/cpp/libs/ls-sycl-device.exe +0 -0
  34. bigdl/cpp/libs/main.exe +0 -0
  35. bigdl/cpp/libs/ollama.exe +0 -0
  36. bigdl/cpp/libs/parallel.exe +0 -0
  37. bigdl/cpp/libs/passkey.exe +0 -0
  38. bigdl/cpp/libs/perplexity.exe +0 -0
  39. bigdl/cpp/libs/q8dot.exe +0 -0
  40. bigdl/cpp/libs/quantize-stats.exe +0 -0
  41. bigdl/cpp/libs/quantize.exe +0 -0
  42. bigdl/cpp/libs/save-load-state.exe +0 -0
  43. bigdl/cpp/libs/server.exe +0 -0
  44. bigdl/cpp/libs/simple.exe +0 -0
  45. bigdl/cpp/libs/speculative.exe +0 -0
  46. bigdl/cpp/libs/tokenize.exe +0 -0
  47. bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
  48. bigdl/cpp/libs/vdot.exe +0 -0
  49. {bigdl_core_cpp-2.5.0b20240724.dist-info → bigdl_core_cpp-2.5.0b20240726.dist-info}/METADATA +1 -1
  50. bigdl_core_cpp-2.5.0b20240726.dist-info/RECORD +61 -0
  51. bigdl_core_cpp-2.5.0b20240724.dist-info/RECORD +0 -61
  52. {bigdl_core_cpp-2.5.0b20240724.data → bigdl_core_cpp-2.5.0b20240726.data}/scripts/init-llama-cpp.bat +0 -0
  53. {bigdl_core_cpp-2.5.0b20240724.data → bigdl_core_cpp-2.5.0b20240726.data}/scripts/init-llama-cpp.ps1 +0 -0
  54. {bigdl_core_cpp-2.5.0b20240724.data → bigdl_core_cpp-2.5.0b20240726.data}/scripts/init-ollama.bat +0 -0
  55. {bigdl_core_cpp-2.5.0b20240724.dist-info → bigdl_core_cpp-2.5.0b20240726.dist-info}/WHEEL +0 -0
  56. {bigdl_core_cpp-2.5.0b20240724.dist-info → bigdl_core_cpp-2.5.0b20240726.dist-info}/top_level.txt +0 -0
@@ -3,10 +3,8 @@ from abc import ABC, ABCMeta, abstractmethod
3
3
 
4
4
  import logging
5
5
  from typing import Any, Callable
6
- from collections import deque
7
6
 
8
7
  import numpy as np
9
- from numpy._typing import _Shape
10
8
  from numpy.typing import DTypeLike
11
9
 
12
10
 
@@ -16,16 +14,16 @@ logger = logging.getLogger(__name__)
16
14
  class LazyMeta(ABCMeta):
17
15
 
18
16
  def __new__(cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs):
19
- def __getattr__(self, __name: str) -> Any:
20
- meta_attr = getattr(self._meta, __name)
17
+ def __getattr__(self, name: str) -> Any:
18
+ meta_attr = getattr(self._meta, name)
21
19
  if callable(meta_attr):
22
20
  return type(self)._wrap_fn(
23
- (lambda s, *args, **kwargs: getattr(s, __name)(*args, **kwargs)),
21
+ (lambda s, *args, **kwargs: getattr(s, name)(*args, **kwargs)),
24
22
  use_self=self,
25
23
  )
26
24
  elif isinstance(meta_attr, self._tensor_type):
27
25
  # e.g. self.T with torch.Tensor should still be wrapped
28
- return type(self)._wrap_fn(lambda s: getattr(s, __name))(self)
26
+ return type(self)._wrap_fn(lambda s: getattr(s, name))(self)
29
27
  else:
30
28
  # no need to wrap non-tensor properties,
31
29
  # and they likely don't depend on the actual contents of the tensor
@@ -75,20 +73,18 @@ class LazyBase(ABC, metaclass=LazyMeta):
75
73
  _tensor_type: type
76
74
  _meta: Any
77
75
  _data: Any | None
78
- _lazy: deque[LazyBase] # shared within a graph, to avoid deep recursion when making eager
79
76
  _args: tuple
80
- _func: Callable[[tuple], Any] | None
77
+ _kwargs: dict[str, Any]
78
+ _func: Callable[[Any], Any] | None
81
79
 
82
- def __init__(self, *, meta: Any, data: Any | None = None, lazy: deque[LazyBase] | None = None, args: tuple = (), func: Callable[[tuple], Any] | None = None):
80
+ def __init__(self, *, meta: Any, data: Any | None = None, args: tuple = (), kwargs: dict[str, Any] | None = None, func: Callable[[Any], Any] | None = None):
83
81
  super().__init__()
84
82
  self._meta = meta
85
83
  self._data = data
86
- self._lazy = lazy if lazy is not None else deque()
87
84
  self._args = args
85
+ self._kwargs = kwargs if kwargs is not None else {}
88
86
  self._func = func
89
87
  assert self._func is not None or self._data is not None
90
- if self._data is None:
91
- self._lazy.append(self)
92
88
 
93
89
  def __init_subclass__(cls) -> None:
94
90
  if "_tensor_type" not in cls.__dict__:
@@ -118,6 +114,7 @@ class LazyBase(ABC, metaclass=LazyMeta):
118
114
  args = ((use_self,) if use_self is not None else ()) + args
119
115
 
120
116
  meta_args = LazyBase._recurse_apply(args, lambda t: t._meta)
117
+ # TODO: maybe handle tensors in kwargs too
121
118
 
122
119
  if isinstance(meta_noop, bool) and not meta_noop:
123
120
  try:
@@ -141,21 +138,7 @@ class LazyBase(ABC, metaclass=LazyMeta):
141
138
  res = cls.meta_with_dtype_and_shape(meta_noop, res.shape)
142
139
 
143
140
  if isinstance(res, cls._tensor_type):
144
- def collect_replace(t: LazyBase):
145
- if collect_replace.shared_lazy is None:
146
- collect_replace.shared_lazy = t._lazy
147
- else:
148
- collect_replace.shared_lazy.extend(t._lazy)
149
- t._lazy = collect_replace.shared_lazy
150
-
151
- # emulating a static variable
152
- collect_replace.shared_lazy = None
153
-
154
- LazyBase._recurse_apply(args, collect_replace)
155
-
156
- shared_lazy = collect_replace.shared_lazy
157
-
158
- return cls(meta=cls.eager_to_meta(res), lazy=shared_lazy, args=args, func=lambda a: fn(*a, **kwargs))
141
+ return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
159
142
  else:
160
143
  del res # not needed
161
144
  # non-tensor return likely relies on the contents of the args
@@ -167,25 +150,18 @@ class LazyBase(ABC, metaclass=LazyMeta):
167
150
  @classmethod
168
151
  def to_eager(cls, t: Any) -> Any:
169
152
  def simple_to_eager(_t: LazyBase) -> Any:
170
- def already_eager_to_eager(_t: LazyBase) -> Any:
171
- assert _t._data is not None
153
+ if _t._data is not None:
172
154
  return _t._data
173
155
 
174
- while _t._data is None:
175
- lt = _t._lazy.popleft()
176
- if lt._data is not None:
177
- # Lazy tensor did not belong in the lazy queue.
178
- # Weirdly only happens with Bloom models...
179
- # likely because tensors aren't unique in the queue.
180
- # The final output is still the same as in eager mode,
181
- # so it's safe to ignore this.
182
- continue
183
- assert lt._func is not None
184
- lt._args = cls._recurse_apply(lt._args, already_eager_to_eager)
185
- lt._data = lt._func(lt._args)
186
- # sanity check
187
- assert lt._data.dtype == lt._meta.dtype
188
- assert lt._data.shape == lt._meta.shape
156
+ # NOTE: there's a recursion limit in Python (usually 1000)
157
+
158
+ assert _t._func is not None
159
+ _t._args = cls._recurse_apply(_t._args, simple_to_eager)
160
+ _t._data = _t._func(*_t._args, **_t._kwargs)
161
+ # sanity check
162
+ assert _t._data is not None
163
+ assert _t._data.dtype == _t._meta.dtype
164
+ assert _t._data.shape == _t._meta.shape
189
165
 
190
166
  return _t._data
191
167
 
@@ -204,7 +180,7 @@ class LazyBase(ABC, metaclass=LazyMeta):
204
180
  @classmethod
205
181
  def from_eager(cls, t: Any) -> Any:
206
182
  if type(t) is cls:
207
- # already eager
183
+ # already lazy
208
184
  return t
209
185
  elif isinstance(t, cls._tensor_type):
210
186
  return cls(meta=cls.eager_to_meta(t), data=t)
@@ -216,7 +192,7 @@ class LazyNumpyTensor(LazyBase):
216
192
  _tensor_type = np.ndarray
217
193
 
218
194
  @classmethod
219
- def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: _Shape) -> np.ndarray[Any, Any]:
195
+ def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) -> np.ndarray[Any, Any]:
220
196
  # The initial idea was to use np.nan as the fill value,
221
197
  # but non-float types like np.int16 can't use that.
222
198
  # So zero it is.
@@ -226,11 +202,10 @@ class LazyNumpyTensor(LazyBase):
226
202
  def astype(self, dtype, *args, **kwargs):
227
203
  meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape)
228
204
  full_args = (self, dtype,) + args
229
- # very important to pass the shared _lazy deque, or else there's an infinite loop somewhere.
230
- return type(self)(meta=meta, args=full_args, lazy=self._lazy, func=(lambda a: a[0].astype(*a[1:], **kwargs)))
205
+ return type(self)(meta=meta, args=full_args, kwargs=kwargs, func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)))
231
206
 
232
207
  def tofile(self, *args, **kwargs):
233
208
  eager = LazyNumpyTensor.to_eager(self)
234
209
  return eager.tofile(*args, **kwargs)
235
210
 
236
- # TODO: __array_function__
211
+ # TODO: __array_function__
@@ -10,7 +10,7 @@ class TensorNameMap:
10
10
  # Token embeddings
11
11
  MODEL_TENSOR.TOKEN_EMBD: (
12
12
  "gpt_neox.embed_in", # gptneox
13
- "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx
13
+ "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais
14
14
  "transformer.word_embeddings", # falcon
15
15
  "word_embeddings", # bloom
16
16
  "model.embed_tokens", # llama-hf
@@ -24,6 +24,9 @@ class TensorNameMap:
24
24
  "backbone.embedding", # mamba
25
25
  "backbone.embeddings", # mamba-hf
26
26
  "transformer.in_out_embed", # Grok
27
+ "embedding.word_embeddings", # chatglm
28
+ "transformer.token_embeddings", # openelm
29
+ "shared", # t5
27
30
  ),
28
31
 
29
32
  # Token type embeddings
@@ -36,6 +39,7 @@ class TensorNameMap:
36
39
  "word_embeddings_layernorm", # bloom
37
40
  "embeddings.LayerNorm", # bert
38
41
  "emb_ln", # nomic-bert
42
+ "transformer.norm", # openelm
39
43
  ),
40
44
 
41
45
  # Position embeddings
@@ -48,16 +52,17 @@ class TensorNameMap:
48
52
  # Output
49
53
  MODEL_TENSOR.OUTPUT: (
50
54
  "embed_out", # gptneox
51
- "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx
55
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
52
56
  "output", # llama-pth bloom internlm2
53
57
  "word_embeddings_for_head", # persimmon
54
58
  "lm_head.linear", # phi2
59
+ "output_layer", # chatglm
55
60
  ),
56
61
 
57
62
  # Output norm
58
63
  MODEL_TENSOR.OUTPUT_NORM: (
59
64
  "gpt_neox.final_layer_norm", # gptneox
60
- "transformer.ln_f", # gpt2 gpt-j falcon
65
+ "transformer.ln_f", # gpt2 gpt-j falcon jais
61
66
  "model.norm", # llama-hf baichuan internlm2
62
67
  "norm", # llama-pth
63
68
  "transformer.norm_f", # mpt dbrx
@@ -68,11 +73,14 @@ class TensorNameMap:
68
73
  "model.norm_f", # mamba-qbert
69
74
  "backbone.norm_f", # mamba
70
75
  "transformer.rms_norm", # Grok
76
+ "encoder.final_layernorm", # chatglm
77
+ "transformer.norm", # openelm
71
78
  ),
72
79
 
73
80
  # Rope frequencies
74
81
  MODEL_TENSOR.ROPE_FREQS: (
75
82
  "rope.freqs", # llama-pth
83
+ "rotary_pos_emb.inv_freq", # chatglm
76
84
  ),
77
85
  }
78
86
 
@@ -80,7 +88,7 @@ class TensorNameMap:
80
88
  # Attention norm
81
89
  MODEL_TENSOR.ATTN_NORM: (
82
90
  "gpt_neox.layers.{bid}.input_layernorm", # gptneox
83
- "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen
91
+ "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais
84
92
  "transformer.blocks.{bid}.norm_1", # mpt
85
93
  "transformer.h.{bid}.input_layernorm", # falcon7b
86
94
  "h.{bid}.input_layernorm", # bloom
@@ -97,17 +105,20 @@ class TensorNameMap:
97
105
  "backbone.layers.{bid}.norm", # mamba
98
106
  "transformer.decoder_layer.{bid}.rms_norm", # Grok
99
107
  "transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
108
+ "encoder.layers.{bid}.input_layernorm", # chatglm
109
+ "transformer.layers.{bid}.attn_norm", # openelm
100
110
  ),
101
111
 
102
112
  # Attention norm 2
103
113
  MODEL_TENSOR.ATTN_NORM_2: (
104
114
  "transformer.h.{bid}.ln_attn", # falcon40b
115
+ "encoder.layer.{bid}.layer_norm_1", # jina-v2-code
105
116
  ),
106
117
 
107
118
  # Attention query-key-value
108
119
  MODEL_TENSOR.ATTN_QKV: (
109
120
  "gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
110
- "transformer.h.{bid}.attn.c_attn", # gpt2 qwen
121
+ "transformer.h.{bid}.attn.c_attn", # gpt2 qwen jais
111
122
  "transformer.blocks.{bid}.attn.Wqkv", # mpt
112
123
  "transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
113
124
  "transformer.h.{bid}.self_attention.query_key_value", # falcon
@@ -117,7 +128,9 @@ class TensorNameMap:
117
128
  "h.{bid}.attn.c_attn", # gpt2
118
129
  "transformer.h.{bid}.mixer.Wqkv", # phi2
119
130
  "encoder.layers.{bid}.attn.Wqkv", # nomic-bert
120
- "model.layers.{bid}.self_attn.qkv_proj" # phi3
131
+ "model.layers.{bid}.self_attn.qkv_proj", # phi3
132
+ "encoder.layers.{bid}.self_attention.query_key_value", # chatglm
133
+ "transformer.layers.{bid}.attn.qkv_proj", # openelm
121
134
  ),
122
135
 
123
136
  # Attention query
@@ -128,7 +141,7 @@ class TensorNameMap:
128
141
  "transformer.h.{bid}.attn.q_proj", # gpt-j
129
142
  "model.layers.layers.{bid}.self_attn.q_proj", # plamo
130
143
  "model.layers.{bid}.attention.wq", # internlm2
131
- "transformer.decoder_layer.{bid}.multi_head_attention.query" # Grok
144
+ "transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
132
145
  ),
133
146
 
134
147
  # Attention key
@@ -140,7 +153,7 @@ class TensorNameMap:
140
153
  "transformer.h.{bid}.attn.k", # refact
141
154
  "model.layers.layers.{bid}.self_attn.k_proj", # plamo
142
155
  "model.layers.{bid}.attention.wk", # internlm2
143
- "transformer.decoder_layer.{bid}.multi_head_attention.key" # Grok
156
+ "transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
144
157
  ),
145
158
 
146
159
  # Attention value
@@ -158,7 +171,7 @@ class TensorNameMap:
158
171
  # Attention output
159
172
  MODEL_TENSOR.ATTN_OUT: (
160
173
  "gpt_neox.layers.{bid}.attention.dense", # gptneox
161
- "transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen
174
+ "transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen jais
162
175
  "transformer.blocks.{bid}.attn.out_proj", # mpt
163
176
  "transformer.h.{bid}.self_attention.dense", # falcon
164
177
  "h.{bid}.self_attention.dense", # bloom
@@ -175,6 +188,8 @@ class TensorNameMap:
175
188
  "encoder.layers.{bid}.attn.out_proj", # nomic-bert
176
189
  "transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
177
190
  "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
191
+ "encoder.layers.{bid}.self_attention.dense", # chatglm
192
+ "transformer.layers.{bid}.attn.out_proj", # openelm
178
193
  ),
179
194
 
180
195
  # Attention output norm
@@ -185,6 +200,10 @@ class TensorNameMap:
185
200
  "transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
186
201
  ),
187
202
 
203
+ MODEL_TENSOR.ATTN_POST_NORM: (
204
+ "model.layers.{bid}.post_attention_layernorm", # gemma2
205
+ ),
206
+
188
207
  # Rotary embeddings
189
208
  MODEL_TENSOR.ATTN_ROT_EMBD: (
190
209
  "model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
@@ -196,7 +215,7 @@ class TensorNameMap:
196
215
  # Feed-forward norm
197
216
  MODEL_TENSOR.FFN_NORM: (
198
217
  "gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
199
- "transformer.h.{bid}.ln_2", # gpt2 refact qwen
218
+ "transformer.h.{bid}.ln_2", # gpt2 refact qwen jais
200
219
  "h.{bid}.post_attention_layernorm", # bloom
201
220
  "transformer.blocks.{bid}.norm_2", # mpt
202
221
  "model.layers.{bid}.post_attention_layernorm", # llama-hf
@@ -206,6 +225,18 @@ class TensorNameMap:
206
225
  "h.{bid}.ln_2", # gpt2
207
226
  "model.layers.{bid}.ffn_norm", # internlm2
208
227
  "transformer.decoder_layer.{bid}.rms_norm_2", # Grok
228
+ "encoder.layers.{bid}.post_attention_layernorm", # chatglm
229
+ "transformer.layers.{bid}.ffn_norm", # openelm
230
+ ),
231
+
232
+ # Post feed-forward norm
233
+ MODEL_TENSOR.FFN_PRE_NORM: (
234
+ "model.layers.{bid}.pre_feedforward_layernorm", # gemma2
235
+ ),
236
+
237
+ # Post feed-forward norm
238
+ MODEL_TENSOR.FFN_POST_NORM: (
239
+ "model.layers.{bid}.post_feedforward_layernorm", # gemma2
209
240
  ),
210
241
 
211
242
  MODEL_TENSOR.FFN_GATE_INP: (
@@ -223,7 +254,7 @@ class TensorNameMap:
223
254
  # Feed-forward up
224
255
  MODEL_TENSOR.FFN_UP: (
225
256
  "gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
226
- "transformer.h.{bid}.mlp.c_fc", # gpt2
257
+ "transformer.h.{bid}.mlp.c_fc", # gpt2 jais
227
258
  "transformer.blocks.{bid}.ffn.up_proj", # mpt
228
259
  "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
229
260
  "h.{bid}.mlp.dense_h_to_4h", # bloom
@@ -245,6 +276,7 @@ class TensorNameMap:
245
276
  "model.layers.{bid}.mlp.c_fc", # starcoder2
246
277
  "encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
247
278
  "model.layers.{bid}.residual_mlp.w3", # arctic
279
+ "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
248
280
  ),
249
281
 
250
282
  MODEL_TENSOR.FFN_UP_EXP: (
@@ -256,6 +288,7 @@ class TensorNameMap:
256
288
 
257
289
  MODEL_TENSOR.FFN_UP_SHEXP: (
258
290
  "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
291
+ "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
259
292
  ),
260
293
 
261
294
  # AWQ-activation gate
@@ -268,6 +301,7 @@ class TensorNameMap:
268
301
  "model.layers.{bid}.mlp.gate_proj", # llama-hf refact
269
302
  "layers.{bid}.feed_forward.w1", # llama-pth
270
303
  "transformer.h.{bid}.mlp.w2", # qwen
304
+ "transformer.h.{bid}.mlp.c_fc2", # jais
271
305
  "model.layers.layers.{bid}.mlp.gate_proj", # plamo
272
306
  "model.layers.{bid}.feed_forward.w1", # internlm2
273
307
  "encoder.layers.{bid}.mlp.fc12", # nomic-bert
@@ -285,12 +319,13 @@ class TensorNameMap:
285
319
 
286
320
  MODEL_TENSOR.FFN_GATE_SHEXP: (
287
321
  "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
322
+ "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
288
323
  ),
289
324
 
290
325
  # Feed-forward down
291
326
  MODEL_TENSOR.FFN_DOWN: (
292
327
  "gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
293
- "transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen
328
+ "transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen jais
294
329
  "transformer.blocks.{bid}.ffn.down_proj", # mpt
295
330
  "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
296
331
  "h.{bid}.mlp.dense_4h_to_h", # bloom
@@ -308,7 +343,10 @@ class TensorNameMap:
308
343
  "encoder.layers.{bid}.mlp.fc2", # nomic-bert
309
344
  "model.layers.{bid}.mlp.c_proj", # starcoder2
310
345
  "encoder.layer.{bid}.mlp.wo", # jina-bert-v2
346
+ "transformer.layers.{bid}.ffn.proj_2", # openelm
311
347
  "model.layers.{bid}.residual_mlp.w2", # arctic
348
+ "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
349
+ "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
312
350
  ),
313
351
 
314
352
  MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -320,6 +358,7 @@ class TensorNameMap:
320
358
 
321
359
  MODEL_TENSOR.FFN_DOWN_SHEXP: (
322
360
  "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
361
+ "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
323
362
  ),
324
363
 
325
364
  MODEL_TENSOR.ATTN_Q_NORM: (
@@ -327,7 +366,8 @@ class TensorNameMap:
327
366
  "model.layers.{bid}.self_attn.q_layernorm", # persimmon
328
367
  "model.layers.{bid}.self_attn.q_norm", # cohere
329
368
  "transformer.blocks.{bid}.attn.q_ln", # sea-lion
330
- "encoder.layer.{bid}.attention.self.layer_norm_q" # jina-bert-v2
369
+ "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
370
+ "transformer.layers.{bid}.attn.q_norm", # openelm
331
371
  ),
332
372
 
333
373
  MODEL_TENSOR.ATTN_K_NORM: (
@@ -335,7 +375,8 @@ class TensorNameMap:
335
375
  "model.layers.{bid}.self_attn.k_layernorm", # persimmon
336
376
  "model.layers.{bid}.self_attn.k_norm", # cohere
337
377
  "transformer.blocks.{bid}.attn.k_ln", # sea-lion
338
- "encoder.layer.{bid}.attention.self.layer_norm_k" # jina-bert-v2
378
+ "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
379
+ "transformer.layers.{bid}.attn.k_norm", # openelm
339
380
  ),
340
381
 
341
382
  MODEL_TENSOR.ROPE_FREQS: (
@@ -347,6 +388,7 @@ class TensorNameMap:
347
388
  "encoder.layers.{bid}.norm2", # nomic-bert
348
389
  "transformer.decoder_layer.{bid}.rms_norm_3", # Grok
349
390
  "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
391
+ "encoder.layer.{bid}.layer_norm_2" # jina-v2-code
350
392
  ),
351
393
 
352
394
  MODEL_TENSOR.SSM_IN: (
@@ -383,6 +425,152 @@ class TensorNameMap:
383
425
  "model.layers.{bid}.out_proj",
384
426
  "backbone.layers.{bid}.mixer.out_proj",
385
427
  ),
428
+
429
+ MODEL_TENSOR.ATTN_Q_A: (
430
+ "model.layers.{bid}.self_attn.q_a_proj", # deepseek2
431
+ ),
432
+
433
+ MODEL_TENSOR.ATTN_Q_B: (
434
+ "model.layers.{bid}.self_attn.q_b_proj", # deepseek2
435
+ ),
436
+
437
+ MODEL_TENSOR.ATTN_KV_A_MQA: (
438
+ "model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
439
+ ),
440
+
441
+ MODEL_TENSOR.ATTN_KV_B: (
442
+ "model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
443
+ ),
444
+
445
+ MODEL_TENSOR.ATTN_Q_A_NORM: (
446
+ "model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
447
+ ),
448
+
449
+ MODEL_TENSOR.ATTN_KV_A_NORM: (
450
+ "model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
451
+ ),
452
+
453
+ MODEL_TENSOR.ATTN_SUB_NORM: (
454
+ "model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
455
+ ),
456
+
457
+ MODEL_TENSOR.FFN_SUB_NORM: (
458
+ "model.layers.{bid}.mlp.ffn_layernorm", # bitnet
459
+ ),
460
+
461
+ MODEL_TENSOR.DEC_ATTN_NORM: (
462
+ "decoder.block.{bid}.layer.0.layer_norm", # t5
463
+ ),
464
+
465
+ MODEL_TENSOR.DEC_ATTN_Q: (
466
+ "decoder.block.{bid}.layer.0.SelfAttention.q", # t5
467
+ ),
468
+
469
+ MODEL_TENSOR.DEC_ATTN_K: (
470
+ "decoder.block.{bid}.layer.0.SelfAttention.k", # t5
471
+ ),
472
+
473
+ MODEL_TENSOR.DEC_ATTN_V: (
474
+ "decoder.block.{bid}.layer.0.SelfAttention.v", # t5
475
+ ),
476
+
477
+ MODEL_TENSOR.DEC_ATTN_OUT: (
478
+ "decoder.block.{bid}.layer.0.SelfAttention.o", # t5
479
+ ),
480
+
481
+ MODEL_TENSOR.DEC_ATTN_REL_B: (
482
+ "decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
483
+ ),
484
+
485
+ MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
486
+ "decoder.block.{bid}.layer.1.layer_norm", # t5
487
+ ),
488
+
489
+ MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
490
+ "decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
491
+ ),
492
+
493
+ MODEL_TENSOR.DEC_CROSS_ATTN_K: (
494
+ "decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
495
+ ),
496
+
497
+ MODEL_TENSOR.DEC_CROSS_ATTN_V: (
498
+ "decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
499
+ ),
500
+
501
+ MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
502
+ "decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
503
+ ),
504
+
505
+ MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
506
+ "decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
507
+ ),
508
+
509
+ MODEL_TENSOR.DEC_FFN_NORM: (
510
+ "decoder.block.{bid}.layer.2.layer_norm", # t5
511
+ ),
512
+
513
+ MODEL_TENSOR.DEC_FFN_GATE: (
514
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
515
+ ),
516
+
517
+ MODEL_TENSOR.DEC_FFN_UP: (
518
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
519
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
520
+ ),
521
+
522
+ MODEL_TENSOR.DEC_FFN_DOWN: (
523
+ "decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
524
+ ),
525
+
526
+ MODEL_TENSOR.DEC_OUTPUT_NORM: (
527
+ "decoder.final_layer_norm", # t5
528
+ ),
529
+
530
+ MODEL_TENSOR.ENC_ATTN_NORM: (
531
+ "encoder.block.{bid}.layer.0.layer_norm", # t5
532
+ ),
533
+
534
+ MODEL_TENSOR.ENC_ATTN_Q: (
535
+ "encoder.block.{bid}.layer.0.SelfAttention.q", # t5
536
+ ),
537
+
538
+ MODEL_TENSOR.ENC_ATTN_K: (
539
+ "encoder.block.{bid}.layer.0.SelfAttention.k", # t5
540
+ ),
541
+
542
+ MODEL_TENSOR.ENC_ATTN_V: (
543
+ "encoder.block.{bid}.layer.0.SelfAttention.v", # t5
544
+ ),
545
+
546
+ MODEL_TENSOR.ENC_ATTN_OUT: (
547
+ "encoder.block.{bid}.layer.0.SelfAttention.o", # t5
548
+ ),
549
+
550
+ MODEL_TENSOR.ENC_ATTN_REL_B: (
551
+ "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
552
+ ),
553
+
554
+ MODEL_TENSOR.ENC_FFN_NORM: (
555
+ "encoder.block.{bid}.layer.1.layer_norm", # t5
556
+ ),
557
+
558
+ MODEL_TENSOR.ENC_FFN_GATE: (
559
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
560
+ ),
561
+
562
+ MODEL_TENSOR.ENC_FFN_UP: (
563
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
564
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
565
+ ),
566
+
567
+ MODEL_TENSOR.ENC_FFN_DOWN: (
568
+ "encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
569
+ ),
570
+
571
+ MODEL_TENSOR.ENC_OUTPUT_NORM: (
572
+ "encoder.final_layer_norm", # t5
573
+ ),
386
574
  }
387
575
 
388
576
  # architecture-specific block mappings
@@ -414,14 +602,12 @@ class TensorNameMap:
414
602
  for tensor, keys in self.block_mappings_cfg.items():
415
603
  if tensor not in MODEL_TENSORS[arch]:
416
604
  continue
417
- # TODO: make this configurable
418
- n_experts = 128
419
- for xid in range(n_experts):
420
- tensor_name = TENSOR_NAMES[tensor].format(bid = bid, xid = xid)
421
- self.mapping[tensor_name] = (tensor, tensor_name)
422
- for key in keys:
423
- key = key.format(bid = bid, xid = xid)
424
- self.mapping[key] = (tensor, tensor_name)
605
+
606
+ tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
607
+ self.mapping[tensor_name] = (tensor, tensor_name)
608
+ for key in keys:
609
+ key = key.format(bid = bid)
610
+ self.mapping[key] = (tensor, tensor_name)
425
611
 
426
612
  def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
427
613
  result = self.mapping.get(key)
@@ -460,4 +646,4 @@ class TensorNameMap:
460
646
 
461
647
 
462
648
  def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
463
- return TensorNameMap(arch, n_blocks)
649
+ return TensorNameMap(arch, n_blocks)
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/common.lib CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/gguf.exe CHANGED
Binary file
bigdl/cpp/libs/gritlm.exe CHANGED
Binary file
Binary file
bigdl/cpp/libs/infill.exe CHANGED
Binary file
Binary file
bigdl/cpp/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/lookup.exe CHANGED
Binary file
Binary file
bigdl/cpp/libs/main.exe CHANGED
Binary file
bigdl/cpp/libs/ollama.exe CHANGED
Binary file