bigdl-core-cpp 2.5.0b20240725__py3-none-win_amd64.whl → 2.5.0b20240727__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. bigdl/cpp/convert-hf-to-gguf.py +1106 -320
  2. bigdl/cpp/gguf-py/gguf/__init__.py +2 -0
  3. bigdl/cpp/gguf-py/gguf/constants.py +442 -173
  4. bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
  5. bigdl/cpp/gguf-py/gguf/gguf_reader.py +29 -8
  6. bigdl/cpp/gguf-py/gguf/gguf_writer.py +472 -156
  7. bigdl/cpp/gguf-py/gguf/lazy.py +24 -49
  8. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +195 -23
  9. bigdl/cpp/libs/baby-llama.exe +0 -0
  10. bigdl/cpp/libs/batched-bench.exe +0 -0
  11. bigdl/cpp/libs/batched.exe +0 -0
  12. bigdl/cpp/libs/beam-search.exe +0 -0
  13. bigdl/cpp/libs/benchmark.exe +0 -0
  14. bigdl/cpp/libs/common.lib +0 -0
  15. bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
  16. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
  17. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
  18. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
  19. bigdl/cpp/libs/embedding.exe +0 -0
  20. bigdl/cpp/libs/export-lora.exe +0 -0
  21. bigdl/cpp/libs/finetune.exe +0 -0
  22. bigdl/cpp/libs/ggml_shared.dll +0 -0
  23. bigdl/cpp/libs/gguf.exe +0 -0
  24. bigdl/cpp/libs/gritlm.exe +0 -0
  25. bigdl/cpp/libs/imatrix.exe +0 -0
  26. bigdl/cpp/libs/infill.exe +0 -0
  27. bigdl/cpp/libs/llama-bench.exe +0 -0
  28. bigdl/cpp/libs/llama.dll +0 -0
  29. bigdl/cpp/libs/llava-cli.exe +0 -0
  30. bigdl/cpp/libs/llava_shared.dll +0 -0
  31. bigdl/cpp/libs/lookahead.exe +0 -0
  32. bigdl/cpp/libs/lookup.exe +0 -0
  33. bigdl/cpp/libs/ls-sycl-device.exe +0 -0
  34. bigdl/cpp/libs/main.exe +0 -0
  35. bigdl/cpp/libs/ollama.exe +0 -0
  36. bigdl/cpp/libs/parallel.exe +0 -0
  37. bigdl/cpp/libs/passkey.exe +0 -0
  38. bigdl/cpp/libs/perplexity.exe +0 -0
  39. bigdl/cpp/libs/q8dot.exe +0 -0
  40. bigdl/cpp/libs/quantize-stats.exe +0 -0
  41. bigdl/cpp/libs/quantize.exe +0 -0
  42. bigdl/cpp/libs/save-load-state.exe +0 -0
  43. bigdl/cpp/libs/server.exe +0 -0
  44. bigdl/cpp/libs/simple.exe +0 -0
  45. bigdl/cpp/libs/speculative.exe +0 -0
  46. bigdl/cpp/libs/tokenize.exe +0 -0
  47. bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
  48. bigdl/cpp/libs/vdot.exe +0 -0
  49. {bigdl_core_cpp-2.5.0b20240725.dist-info → bigdl_core_cpp-2.5.0b20240727.dist-info}/METADATA +1 -1
  50. bigdl_core_cpp-2.5.0b20240727.dist-info/RECORD +61 -0
  51. bigdl_core_cpp-2.5.0b20240725.dist-info/RECORD +0 -61
  52. {bigdl_core_cpp-2.5.0b20240725.data → bigdl_core_cpp-2.5.0b20240727.data}/scripts/init-llama-cpp.bat +0 -0
  53. {bigdl_core_cpp-2.5.0b20240725.data → bigdl_core_cpp-2.5.0b20240727.data}/scripts/init-llama-cpp.ps1 +0 -0
  54. {bigdl_core_cpp-2.5.0b20240725.data → bigdl_core_cpp-2.5.0b20240727.data}/scripts/init-ollama.bat +0 -0
  55. {bigdl_core_cpp-2.5.0b20240725.dist-info → bigdl_core_cpp-2.5.0b20240727.dist-info}/WHEEL +0 -0
  56. {bigdl_core_cpp-2.5.0b20240725.dist-info → bigdl_core_cpp-2.5.0b20240727.dist-info}/top_level.txt +0 -0
@@ -3,10 +3,8 @@ from abc import ABC, ABCMeta, abstractmethod
3
3
 
4
4
  import logging
5
5
  from typing import Any, Callable
6
- from collections import deque
7
6
 
8
7
  import numpy as np
9
- from numpy._typing import _Shape
10
8
  from numpy.typing import DTypeLike
11
9
 
12
10
 
@@ -16,16 +14,16 @@ logger = logging.getLogger(__name__)
16
14
  class LazyMeta(ABCMeta):
17
15
 
18
16
  def __new__(cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs):
19
- def __getattr__(self, __name: str) -> Any:
20
- meta_attr = getattr(self._meta, __name)
17
+ def __getattr__(self, name: str) -> Any:
18
+ meta_attr = getattr(self._meta, name)
21
19
  if callable(meta_attr):
22
20
  return type(self)._wrap_fn(
23
- (lambda s, *args, **kwargs: getattr(s, __name)(*args, **kwargs)),
21
+ (lambda s, *args, **kwargs: getattr(s, name)(*args, **kwargs)),
24
22
  use_self=self,
25
23
  )
26
24
  elif isinstance(meta_attr, self._tensor_type):
27
25
  # e.g. self.T with torch.Tensor should still be wrapped
28
- return type(self)._wrap_fn(lambda s: getattr(s, __name))(self)
26
+ return type(self)._wrap_fn(lambda s: getattr(s, name))(self)
29
27
  else:
30
28
  # no need to wrap non-tensor properties,
31
29
  # and they likely don't depend on the actual contents of the tensor
@@ -75,20 +73,18 @@ class LazyBase(ABC, metaclass=LazyMeta):
75
73
  _tensor_type: type
76
74
  _meta: Any
77
75
  _data: Any | None
78
- _lazy: deque[LazyBase] # shared within a graph, to avoid deep recursion when making eager
79
76
  _args: tuple
80
- _func: Callable[[tuple], Any] | None
77
+ _kwargs: dict[str, Any]
78
+ _func: Callable[[Any], Any] | None
81
79
 
82
- def __init__(self, *, meta: Any, data: Any | None = None, lazy: deque[LazyBase] | None = None, args: tuple = (), func: Callable[[tuple], Any] | None = None):
80
+ def __init__(self, *, meta: Any, data: Any | None = None, args: tuple = (), kwargs: dict[str, Any] | None = None, func: Callable[[Any], Any] | None = None):
83
81
  super().__init__()
84
82
  self._meta = meta
85
83
  self._data = data
86
- self._lazy = lazy if lazy is not None else deque()
87
84
  self._args = args
85
+ self._kwargs = kwargs if kwargs is not None else {}
88
86
  self._func = func
89
87
  assert self._func is not None or self._data is not None
90
- if self._data is None:
91
- self._lazy.append(self)
92
88
 
93
89
  def __init_subclass__(cls) -> None:
94
90
  if "_tensor_type" not in cls.__dict__:
@@ -118,6 +114,7 @@ class LazyBase(ABC, metaclass=LazyMeta):
118
114
  args = ((use_self,) if use_self is not None else ()) + args
119
115
 
120
116
  meta_args = LazyBase._recurse_apply(args, lambda t: t._meta)
117
+ # TODO: maybe handle tensors in kwargs too
121
118
 
122
119
  if isinstance(meta_noop, bool) and not meta_noop:
123
120
  try:
@@ -141,21 +138,7 @@ class LazyBase(ABC, metaclass=LazyMeta):
141
138
  res = cls.meta_with_dtype_and_shape(meta_noop, res.shape)
142
139
 
143
140
  if isinstance(res, cls._tensor_type):
144
- def collect_replace(t: LazyBase):
145
- if collect_replace.shared_lazy is None:
146
- collect_replace.shared_lazy = t._lazy
147
- else:
148
- collect_replace.shared_lazy.extend(t._lazy)
149
- t._lazy = collect_replace.shared_lazy
150
-
151
- # emulating a static variable
152
- collect_replace.shared_lazy = None
153
-
154
- LazyBase._recurse_apply(args, collect_replace)
155
-
156
- shared_lazy = collect_replace.shared_lazy
157
-
158
- return cls(meta=cls.eager_to_meta(res), lazy=shared_lazy, args=args, func=lambda a: fn(*a, **kwargs))
141
+ return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
159
142
  else:
160
143
  del res # not needed
161
144
  # non-tensor return likely relies on the contents of the args
@@ -167,25 +150,18 @@ class LazyBase(ABC, metaclass=LazyMeta):
167
150
  @classmethod
168
151
  def to_eager(cls, t: Any) -> Any:
169
152
  def simple_to_eager(_t: LazyBase) -> Any:
170
- def already_eager_to_eager(_t: LazyBase) -> Any:
171
- assert _t._data is not None
153
+ if _t._data is not None:
172
154
  return _t._data
173
155
 
174
- while _t._data is None:
175
- lt = _t._lazy.popleft()
176
- if lt._data is not None:
177
- # Lazy tensor did not belong in the lazy queue.
178
- # Weirdly only happens with Bloom models...
179
- # likely because tensors aren't unique in the queue.
180
- # The final output is still the same as in eager mode,
181
- # so it's safe to ignore this.
182
- continue
183
- assert lt._func is not None
184
- lt._args = cls._recurse_apply(lt._args, already_eager_to_eager)
185
- lt._data = lt._func(lt._args)
186
- # sanity check
187
- assert lt._data.dtype == lt._meta.dtype
188
- assert lt._data.shape == lt._meta.shape
156
+ # NOTE: there's a recursion limit in Python (usually 1000)
157
+
158
+ assert _t._func is not None
159
+ _t._args = cls._recurse_apply(_t._args, simple_to_eager)
160
+ _t._data = _t._func(*_t._args, **_t._kwargs)
161
+ # sanity check
162
+ assert _t._data is not None
163
+ assert _t._data.dtype == _t._meta.dtype
164
+ assert _t._data.shape == _t._meta.shape
189
165
 
190
166
  return _t._data
191
167
 
@@ -204,7 +180,7 @@ class LazyBase(ABC, metaclass=LazyMeta):
204
180
  @classmethod
205
181
  def from_eager(cls, t: Any) -> Any:
206
182
  if type(t) is cls:
207
- # already eager
183
+ # already lazy
208
184
  return t
209
185
  elif isinstance(t, cls._tensor_type):
210
186
  return cls(meta=cls.eager_to_meta(t), data=t)
@@ -216,7 +192,7 @@ class LazyNumpyTensor(LazyBase):
216
192
  _tensor_type = np.ndarray
217
193
 
218
194
  @classmethod
219
- def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: _Shape) -> np.ndarray[Any, Any]:
195
+ def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) -> np.ndarray[Any, Any]:
220
196
  # The initial idea was to use np.nan as the fill value,
221
197
  # but non-float types like np.int16 can't use that.
222
198
  # So zero it is.
@@ -226,11 +202,10 @@ class LazyNumpyTensor(LazyBase):
226
202
  def astype(self, dtype, *args, **kwargs):
227
203
  meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape)
228
204
  full_args = (self, dtype,) + args
229
- # very important to pass the shared _lazy deque, or else there's an infinite loop somewhere.
230
- return type(self)(meta=meta, args=full_args, lazy=self._lazy, func=(lambda a: a[0].astype(*a[1:], **kwargs)))
205
+ return type(self)(meta=meta, args=full_args, kwargs=kwargs, func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)))
231
206
 
232
207
  def tofile(self, *args, **kwargs):
233
208
  eager = LazyNumpyTensor.to_eager(self)
234
209
  return eager.tofile(*args, **kwargs)
235
210
 
236
- # TODO: __array_function__
211
+ # TODO: __array_function__
@@ -10,7 +10,7 @@ class TensorNameMap:
10
10
  # Token embeddings
11
11
  MODEL_TENSOR.TOKEN_EMBD: (
12
12
  "gpt_neox.embed_in", # gptneox
13
- "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx
13
+ "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais
14
14
  "transformer.word_embeddings", # falcon
15
15
  "word_embeddings", # bloom
16
16
  "model.embed_tokens", # llama-hf
@@ -24,6 +24,9 @@ class TensorNameMap:
24
24
  "backbone.embedding", # mamba
25
25
  "backbone.embeddings", # mamba-hf
26
26
  "transformer.in_out_embed", # Grok
27
+ "embedding.word_embeddings", # chatglm
28
+ "transformer.token_embeddings", # openelm
29
+ "shared", # t5
27
30
  ),
28
31
 
29
32
  # Token type embeddings
@@ -36,6 +39,7 @@ class TensorNameMap:
36
39
  "word_embeddings_layernorm", # bloom
37
40
  "embeddings.LayerNorm", # bert
38
41
  "emb_ln", # nomic-bert
42
+ "transformer.norm", # openelm
39
43
  ),
40
44
 
41
45
  # Position embeddings
@@ -48,16 +52,17 @@ class TensorNameMap:
48
52
  # Output
49
53
  MODEL_TENSOR.OUTPUT: (
50
54
  "embed_out", # gptneox
51
- "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx
55
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
52
56
  "output", # llama-pth bloom internlm2
53
57
  "word_embeddings_for_head", # persimmon
54
58
  "lm_head.linear", # phi2
59
+ "output_layer", # chatglm
55
60
  ),
56
61
 
57
62
  # Output norm
58
63
  MODEL_TENSOR.OUTPUT_NORM: (
59
64
  "gpt_neox.final_layer_norm", # gptneox
60
- "transformer.ln_f", # gpt2 gpt-j falcon
65
+ "transformer.ln_f", # gpt2 gpt-j falcon jais
61
66
  "model.norm", # llama-hf baichuan internlm2
62
67
  "norm", # llama-pth
63
68
  "transformer.norm_f", # mpt dbrx
@@ -68,11 +73,14 @@ class TensorNameMap:
68
73
  "model.norm_f", # mamba-qbert
69
74
  "backbone.norm_f", # mamba
70
75
  "transformer.rms_norm", # Grok
76
+ "encoder.final_layernorm", # chatglm
77
+ "transformer.norm", # openelm
71
78
  ),
72
79
 
73
80
  # Rope frequencies
74
81
  MODEL_TENSOR.ROPE_FREQS: (
75
82
  "rope.freqs", # llama-pth
83
+ "rotary_pos_emb.inv_freq", # chatglm
76
84
  ),
77
85
  }
78
86
 
@@ -80,7 +88,7 @@ class TensorNameMap:
80
88
  # Attention norm
81
89
  MODEL_TENSOR.ATTN_NORM: (
82
90
  "gpt_neox.layers.{bid}.input_layernorm", # gptneox
83
- "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen
91
+ "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais
84
92
  "transformer.blocks.{bid}.norm_1", # mpt
85
93
  "transformer.h.{bid}.input_layernorm", # falcon7b
86
94
  "h.{bid}.input_layernorm", # bloom
@@ -97,17 +105,20 @@ class TensorNameMap:
97
105
  "backbone.layers.{bid}.norm", # mamba
98
106
  "transformer.decoder_layer.{bid}.rms_norm", # Grok
99
107
  "transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
108
+ "encoder.layers.{bid}.input_layernorm", # chatglm
109
+ "transformer.layers.{bid}.attn_norm", # openelm
100
110
  ),
101
111
 
102
112
  # Attention norm 2
103
113
  MODEL_TENSOR.ATTN_NORM_2: (
104
114
  "transformer.h.{bid}.ln_attn", # falcon40b
115
+ "encoder.layer.{bid}.layer_norm_1", # jina-v2-code
105
116
  ),
106
117
 
107
118
  # Attention query-key-value
108
119
  MODEL_TENSOR.ATTN_QKV: (
109
120
  "gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
110
- "transformer.h.{bid}.attn.c_attn", # gpt2 qwen
121
+ "transformer.h.{bid}.attn.c_attn", # gpt2 qwen jais
111
122
  "transformer.blocks.{bid}.attn.Wqkv", # mpt
112
123
  "transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
113
124
  "transformer.h.{bid}.self_attention.query_key_value", # falcon
@@ -117,7 +128,9 @@ class TensorNameMap:
117
128
  "h.{bid}.attn.c_attn", # gpt2
118
129
  "transformer.h.{bid}.mixer.Wqkv", # phi2
119
130
  "encoder.layers.{bid}.attn.Wqkv", # nomic-bert
120
- "model.layers.{bid}.self_attn.qkv_proj" # phi3
131
+ "model.layers.{bid}.self_attn.qkv_proj", # phi3
132
+ "encoder.layers.{bid}.self_attention.query_key_value", # chatglm
133
+ "transformer.layers.{bid}.attn.qkv_proj", # openelm
121
134
  ),
122
135
 
123
136
  # Attention query
@@ -128,7 +141,7 @@ class TensorNameMap:
128
141
  "transformer.h.{bid}.attn.q_proj", # gpt-j
129
142
  "model.layers.layers.{bid}.self_attn.q_proj", # plamo
130
143
  "model.layers.{bid}.attention.wq", # internlm2
131
- "transformer.decoder_layer.{bid}.multi_head_attention.query" # Grok
144
+ "transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
132
145
  ),
133
146
 
134
147
  # Attention key
@@ -140,7 +153,7 @@ class TensorNameMap:
140
153
  "transformer.h.{bid}.attn.k", # refact
141
154
  "model.layers.layers.{bid}.self_attn.k_proj", # plamo
142
155
  "model.layers.{bid}.attention.wk", # internlm2
143
- "transformer.decoder_layer.{bid}.multi_head_attention.key" # Grok
156
+ "transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
144
157
  ),
145
158
 
146
159
  # Attention value
@@ -158,7 +171,7 @@ class TensorNameMap:
158
171
  # Attention output
159
172
  MODEL_TENSOR.ATTN_OUT: (
160
173
  "gpt_neox.layers.{bid}.attention.dense", # gptneox
161
- "transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen
174
+ "transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen jais
162
175
  "transformer.blocks.{bid}.attn.out_proj", # mpt
163
176
  "transformer.h.{bid}.self_attention.dense", # falcon
164
177
  "h.{bid}.self_attention.dense", # bloom
@@ -175,6 +188,8 @@ class TensorNameMap:
175
188
  "encoder.layers.{bid}.attn.out_proj", # nomic-bert
176
189
  "transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
177
190
  "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
191
+ "encoder.layers.{bid}.self_attention.dense", # chatglm
192
+ "transformer.layers.{bid}.attn.out_proj", # openelm
178
193
  ),
179
194
 
180
195
  # Attention output norm
@@ -200,7 +215,7 @@ class TensorNameMap:
200
215
  # Feed-forward norm
201
216
  MODEL_TENSOR.FFN_NORM: (
202
217
  "gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
203
- "transformer.h.{bid}.ln_2", # gpt2 refact qwen
218
+ "transformer.h.{bid}.ln_2", # gpt2 refact qwen jais
204
219
  "h.{bid}.post_attention_layernorm", # bloom
205
220
  "transformer.blocks.{bid}.norm_2", # mpt
206
221
  "model.layers.{bid}.post_attention_layernorm", # llama-hf
@@ -210,6 +225,8 @@ class TensorNameMap:
210
225
  "h.{bid}.ln_2", # gpt2
211
226
  "model.layers.{bid}.ffn_norm", # internlm2
212
227
  "transformer.decoder_layer.{bid}.rms_norm_2", # Grok
228
+ "encoder.layers.{bid}.post_attention_layernorm", # chatglm
229
+ "transformer.layers.{bid}.ffn_norm", # openelm
213
230
  ),
214
231
 
215
232
  # Post feed-forward norm
@@ -237,7 +254,7 @@ class TensorNameMap:
237
254
  # Feed-forward up
238
255
  MODEL_TENSOR.FFN_UP: (
239
256
  "gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
240
- "transformer.h.{bid}.mlp.c_fc", # gpt2
257
+ "transformer.h.{bid}.mlp.c_fc", # gpt2 jais
241
258
  "transformer.blocks.{bid}.ffn.up_proj", # mpt
242
259
  "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
243
260
  "h.{bid}.mlp.dense_h_to_4h", # bloom
@@ -259,6 +276,7 @@ class TensorNameMap:
259
276
  "model.layers.{bid}.mlp.c_fc", # starcoder2
260
277
  "encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
261
278
  "model.layers.{bid}.residual_mlp.w3", # arctic
279
+ "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
262
280
  ),
263
281
 
264
282
  MODEL_TENSOR.FFN_UP_EXP: (
@@ -270,6 +288,7 @@ class TensorNameMap:
270
288
 
271
289
  MODEL_TENSOR.FFN_UP_SHEXP: (
272
290
  "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
291
+ "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
273
292
  ),
274
293
 
275
294
  # AWQ-activation gate
@@ -282,6 +301,7 @@ class TensorNameMap:
282
301
  "model.layers.{bid}.mlp.gate_proj", # llama-hf refact
283
302
  "layers.{bid}.feed_forward.w1", # llama-pth
284
303
  "transformer.h.{bid}.mlp.w2", # qwen
304
+ "transformer.h.{bid}.mlp.c_fc2", # jais
285
305
  "model.layers.layers.{bid}.mlp.gate_proj", # plamo
286
306
  "model.layers.{bid}.feed_forward.w1", # internlm2
287
307
  "encoder.layers.{bid}.mlp.fc12", # nomic-bert
@@ -299,12 +319,13 @@ class TensorNameMap:
299
319
 
300
320
  MODEL_TENSOR.FFN_GATE_SHEXP: (
301
321
  "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
322
+ "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
302
323
  ),
303
324
 
304
325
  # Feed-forward down
305
326
  MODEL_TENSOR.FFN_DOWN: (
306
327
  "gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
307
- "transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen
328
+ "transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen jais
308
329
  "transformer.blocks.{bid}.ffn.down_proj", # mpt
309
330
  "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
310
331
  "h.{bid}.mlp.dense_4h_to_h", # bloom
@@ -322,7 +343,10 @@ class TensorNameMap:
322
343
  "encoder.layers.{bid}.mlp.fc2", # nomic-bert
323
344
  "model.layers.{bid}.mlp.c_proj", # starcoder2
324
345
  "encoder.layer.{bid}.mlp.wo", # jina-bert-v2
346
+ "transformer.layers.{bid}.ffn.proj_2", # openelm
325
347
  "model.layers.{bid}.residual_mlp.w2", # arctic
348
+ "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
349
+ "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
326
350
  ),
327
351
 
328
352
  MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -334,6 +358,7 @@ class TensorNameMap:
334
358
 
335
359
  MODEL_TENSOR.FFN_DOWN_SHEXP: (
336
360
  "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
361
+ "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
337
362
  ),
338
363
 
339
364
  MODEL_TENSOR.ATTN_Q_NORM: (
@@ -341,7 +366,8 @@ class TensorNameMap:
341
366
  "model.layers.{bid}.self_attn.q_layernorm", # persimmon
342
367
  "model.layers.{bid}.self_attn.q_norm", # cohere
343
368
  "transformer.blocks.{bid}.attn.q_ln", # sea-lion
344
- "encoder.layer.{bid}.attention.self.layer_norm_q" # jina-bert-v2
369
+ "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
370
+ "transformer.layers.{bid}.attn.q_norm", # openelm
345
371
  ),
346
372
 
347
373
  MODEL_TENSOR.ATTN_K_NORM: (
@@ -349,7 +375,8 @@ class TensorNameMap:
349
375
  "model.layers.{bid}.self_attn.k_layernorm", # persimmon
350
376
  "model.layers.{bid}.self_attn.k_norm", # cohere
351
377
  "transformer.blocks.{bid}.attn.k_ln", # sea-lion
352
- "encoder.layer.{bid}.attention.self.layer_norm_k" # jina-bert-v2
378
+ "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
379
+ "transformer.layers.{bid}.attn.k_norm", # openelm
353
380
  ),
354
381
 
355
382
  MODEL_TENSOR.ROPE_FREQS: (
@@ -361,6 +388,7 @@ class TensorNameMap:
361
388
  "encoder.layers.{bid}.norm2", # nomic-bert
362
389
  "transformer.decoder_layer.{bid}.rms_norm_3", # Grok
363
390
  "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
391
+ "encoder.layer.{bid}.layer_norm_2" # jina-v2-code
364
392
  ),
365
393
 
366
394
  MODEL_TENSOR.SSM_IN: (
@@ -397,6 +425,152 @@ class TensorNameMap:
397
425
  "model.layers.{bid}.out_proj",
398
426
  "backbone.layers.{bid}.mixer.out_proj",
399
427
  ),
428
+
429
+ MODEL_TENSOR.ATTN_Q_A: (
430
+ "model.layers.{bid}.self_attn.q_a_proj", # deepseek2
431
+ ),
432
+
433
+ MODEL_TENSOR.ATTN_Q_B: (
434
+ "model.layers.{bid}.self_attn.q_b_proj", # deepseek2
435
+ ),
436
+
437
+ MODEL_TENSOR.ATTN_KV_A_MQA: (
438
+ "model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
439
+ ),
440
+
441
+ MODEL_TENSOR.ATTN_KV_B: (
442
+ "model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
443
+ ),
444
+
445
+ MODEL_TENSOR.ATTN_Q_A_NORM: (
446
+ "model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
447
+ ),
448
+
449
+ MODEL_TENSOR.ATTN_KV_A_NORM: (
450
+ "model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
451
+ ),
452
+
453
+ MODEL_TENSOR.ATTN_SUB_NORM: (
454
+ "model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
455
+ ),
456
+
457
+ MODEL_TENSOR.FFN_SUB_NORM: (
458
+ "model.layers.{bid}.mlp.ffn_layernorm", # bitnet
459
+ ),
460
+
461
+ MODEL_TENSOR.DEC_ATTN_NORM: (
462
+ "decoder.block.{bid}.layer.0.layer_norm", # t5
463
+ ),
464
+
465
+ MODEL_TENSOR.DEC_ATTN_Q: (
466
+ "decoder.block.{bid}.layer.0.SelfAttention.q", # t5
467
+ ),
468
+
469
+ MODEL_TENSOR.DEC_ATTN_K: (
470
+ "decoder.block.{bid}.layer.0.SelfAttention.k", # t5
471
+ ),
472
+
473
+ MODEL_TENSOR.DEC_ATTN_V: (
474
+ "decoder.block.{bid}.layer.0.SelfAttention.v", # t5
475
+ ),
476
+
477
+ MODEL_TENSOR.DEC_ATTN_OUT: (
478
+ "decoder.block.{bid}.layer.0.SelfAttention.o", # t5
479
+ ),
480
+
481
+ MODEL_TENSOR.DEC_ATTN_REL_B: (
482
+ "decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
483
+ ),
484
+
485
+ MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
486
+ "decoder.block.{bid}.layer.1.layer_norm", # t5
487
+ ),
488
+
489
+ MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
490
+ "decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
491
+ ),
492
+
493
+ MODEL_TENSOR.DEC_CROSS_ATTN_K: (
494
+ "decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
495
+ ),
496
+
497
+ MODEL_TENSOR.DEC_CROSS_ATTN_V: (
498
+ "decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
499
+ ),
500
+
501
+ MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
502
+ "decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
503
+ ),
504
+
505
+ MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
506
+ "decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
507
+ ),
508
+
509
+ MODEL_TENSOR.DEC_FFN_NORM: (
510
+ "decoder.block.{bid}.layer.2.layer_norm", # t5
511
+ ),
512
+
513
+ MODEL_TENSOR.DEC_FFN_GATE: (
514
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
515
+ ),
516
+
517
+ MODEL_TENSOR.DEC_FFN_UP: (
518
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
519
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
520
+ ),
521
+
522
+ MODEL_TENSOR.DEC_FFN_DOWN: (
523
+ "decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
524
+ ),
525
+
526
+ MODEL_TENSOR.DEC_OUTPUT_NORM: (
527
+ "decoder.final_layer_norm", # t5
528
+ ),
529
+
530
+ MODEL_TENSOR.ENC_ATTN_NORM: (
531
+ "encoder.block.{bid}.layer.0.layer_norm", # t5
532
+ ),
533
+
534
+ MODEL_TENSOR.ENC_ATTN_Q: (
535
+ "encoder.block.{bid}.layer.0.SelfAttention.q", # t5
536
+ ),
537
+
538
+ MODEL_TENSOR.ENC_ATTN_K: (
539
+ "encoder.block.{bid}.layer.0.SelfAttention.k", # t5
540
+ ),
541
+
542
+ MODEL_TENSOR.ENC_ATTN_V: (
543
+ "encoder.block.{bid}.layer.0.SelfAttention.v", # t5
544
+ ),
545
+
546
+ MODEL_TENSOR.ENC_ATTN_OUT: (
547
+ "encoder.block.{bid}.layer.0.SelfAttention.o", # t5
548
+ ),
549
+
550
+ MODEL_TENSOR.ENC_ATTN_REL_B: (
551
+ "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
552
+ ),
553
+
554
+ MODEL_TENSOR.ENC_FFN_NORM: (
555
+ "encoder.block.{bid}.layer.1.layer_norm", # t5
556
+ ),
557
+
558
+ MODEL_TENSOR.ENC_FFN_GATE: (
559
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
560
+ ),
561
+
562
+ MODEL_TENSOR.ENC_FFN_UP: (
563
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
564
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
565
+ ),
566
+
567
+ MODEL_TENSOR.ENC_FFN_DOWN: (
568
+ "encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
569
+ ),
570
+
571
+ MODEL_TENSOR.ENC_OUTPUT_NORM: (
572
+ "encoder.final_layer_norm", # t5
573
+ ),
400
574
  }
401
575
 
402
576
  # architecture-specific block mappings
@@ -428,14 +602,12 @@ class TensorNameMap:
428
602
  for tensor, keys in self.block_mappings_cfg.items():
429
603
  if tensor not in MODEL_TENSORS[arch]:
430
604
  continue
431
- # TODO: make this configurable
432
- n_experts = 128
433
- for xid in range(n_experts):
434
- tensor_name = TENSOR_NAMES[tensor].format(bid = bid, xid = xid)
435
- self.mapping[tensor_name] = (tensor, tensor_name)
436
- for key in keys:
437
- key = key.format(bid = bid, xid = xid)
438
- self.mapping[key] = (tensor, tensor_name)
605
+
606
+ tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
607
+ self.mapping[tensor_name] = (tensor, tensor_name)
608
+ for key in keys:
609
+ key = key.format(bid = bid)
610
+ self.mapping[key] = (tensor, tensor_name)
439
611
 
440
612
  def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
441
613
  result = self.mapping.get(key)
@@ -474,4 +646,4 @@ class TensorNameMap:
474
646
 
475
647
 
476
648
  def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
477
- return TensorNameMap(arch, n_blocks)
649
+ return TensorNameMap(arch, n_blocks)
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/common.lib CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/gguf.exe CHANGED
Binary file
bigdl/cpp/libs/gritlm.exe CHANGED
Binary file
Binary file
bigdl/cpp/libs/infill.exe CHANGED
Binary file
Binary file
bigdl/cpp/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/lookup.exe CHANGED
Binary file
Binary file
bigdl/cpp/libs/main.exe CHANGED
Binary file
bigdl/cpp/libs/ollama.exe CHANGED
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/q8dot.exe CHANGED
Binary file
Binary file
Binary file