ipex-llm 2.2.0b20250102__py3-none-win_amd64.whl → 2.2.0b20250104__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. ipex_llm/libs/bloom-api.dll +0 -0
  2. ipex_llm/libs/bloom.dll +0 -0
  3. ipex_llm/libs/gptneox-api.dll +0 -0
  4. ipex_llm/libs/gptneox.dll +0 -0
  5. ipex_llm/libs/libbloom_avx.dll +0 -0
  6. ipex_llm/libs/libbloom_vnni.dll +0 -0
  7. ipex_llm/libs/libgptneox_avx.dll +0 -0
  8. ipex_llm/libs/libgptneox_vnni.dll +0 -0
  9. ipex_llm/libs/libllama_avx.dll +0 -0
  10. ipex_llm/libs/libllama_vnni.dll +0 -0
  11. ipex_llm/libs/libstarcoder_avx.dll +0 -0
  12. ipex_llm/libs/libstarcoder_vnni.dll +0 -0
  13. ipex_llm/libs/llama-api.dll +0 -0
  14. ipex_llm/libs/llama.dll +0 -0
  15. ipex_llm/libs/main-bloom.exe +0 -0
  16. ipex_llm/libs/main-gptneox.exe +0 -0
  17. ipex_llm/libs/main-llama.exe +0 -0
  18. ipex_llm/libs/main-starcoder.exe +0 -0
  19. ipex_llm/libs/pipeline.dll +0 -0
  20. ipex_llm/libs/quantize-bloom.exe +0 -0
  21. ipex_llm/libs/quantize-bloom_vnni.exe +0 -0
  22. ipex_llm/libs/quantize-gptneox.exe +0 -0
  23. ipex_llm/libs/quantize-gptneox_vnni.exe +0 -0
  24. ipex_llm/libs/quantize-llama.exe +0 -0
  25. ipex_llm/libs/quantize-llama_vnni.exe +0 -0
  26. ipex_llm/libs/quantize-starcoder.exe +0 -0
  27. ipex_llm/libs/quantize-starcoder_vnni.exe +0 -0
  28. ipex_llm/libs/starcoder-api.dll +0 -0
  29. ipex_llm/libs/starcoder.dll +0 -0
  30. ipex_llm/transformers/convert.py +3 -10
  31. ipex_llm/transformers/models/yuan.py +2 -50
  32. ipex_llm/transformers/xpu_ops.py +155 -0
  33. ipex_llm/utils/__init__.py +1 -2
  34. ipex_llm/utils/benchmark_util_4_47.py +4907 -0
  35. ipex_llm/vllm/xpu/model_convert.py +2 -0
  36. {ipex_llm-2.2.0b20250102.dist-info → ipex_llm-2.2.0b20250104.dist-info}/METADATA +19 -19
  37. {ipex_llm-2.2.0b20250102.dist-info → ipex_llm-2.2.0b20250104.dist-info}/RECORD +43 -41
  38. {ipex_llm-2.2.0b20250102.data → ipex_llm-2.2.0b20250104.data}/scripts/ipex-llm-init.bat +0 -0
  39. {ipex_llm-2.2.0b20250102.data → ipex_llm-2.2.0b20250104.data}/scripts/llm-chat.ps1 +0 -0
  40. {ipex_llm-2.2.0b20250102.data → ipex_llm-2.2.0b20250104.data}/scripts/llm-cli.ps1 +0 -0
  41. {ipex_llm-2.2.0b20250102.dist-info → ipex_llm-2.2.0b20250104.dist-info}/WHEEL +0 -0
  42. {ipex_llm-2.2.0b20250102.dist-info → ipex_llm-2.2.0b20250104.dist-info}/entry_points.txt +0 -0
  43. {ipex_llm-2.2.0b20250102.dist-info → ipex_llm-2.2.0b20250104.dist-info}/top_level.txt +0 -0
Binary file
ipex_llm/libs/bloom.dll CHANGED
Binary file
Binary file
ipex_llm/libs/gptneox.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
ipex_llm/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -1984,16 +1984,9 @@ def _optimize_post(model):
1984
1984
  modeling_module_name = model.__class__.__module__
1985
1985
  module = importlib.import_module(modeling_module_name)
1986
1986
  from ipex_llm.transformers.models.yuan import yuan_attention_forward
1987
- # from ipex_llm.transformers.models.yuan import yuan_mlp_forward
1988
- convert_forward(model,
1989
- module.YuanAttention,
1990
- yuan_attention_forward
1991
- )
1992
- # disable able mlp_forward for quantize_kv on mtl.
1993
- # convert_forward(model,
1994
- # module.YuanMLP,
1995
- # yuan_mlp_forward
1996
- # )
1987
+ convert_forward(model, module.YuanAttention, yuan_attention_forward)
1988
+ # from ipex_llm.transformers.models.common import mlp_silu_forward
1989
+ # convert_forward(model, module.YuanMLP, mlp_silu_forward)
1997
1990
  elif model.config.model_type == 'bert' and (
1998
1991
  not model.config.is_decoder and
1999
1992
  model.config.position_embedding_type == "absolute"
@@ -20,17 +20,15 @@
20
20
  # https://huggingface.co/IEITYuan/Yuan2-2B-hf/blob/7ab7b3c18eb8e5232ce2a3f720d4e6f4b53a2806/README.md#%E5%A3%B0%E6%98%8E%E4%B8%8E%E5%8D%8F%E8%AE%AEterms-and-conditions
21
21
  #
22
22
 
23
- import math
24
23
  from typing import Optional, Tuple
25
24
 
26
25
  import torch
27
26
 
28
27
  from ipex_llm.utils.common import invalidInputError
29
28
  from ipex_llm.transformers.models.common import scaled_dot_product_attention
30
- from ipex_llm.transformers.models.utils import apply_rotary_pos_emb, \
31
- mlp_fusion_check, fp16_fusion_check
29
+ from ipex_llm.transformers.models.utils import apply_rotary_pos_emb
32
30
  from ipex_llm.transformers.models.utils import use_quantize_kv_cache
33
- from ipex_llm.transformers.models.utils import SILU, update_past_key_value
31
+ from ipex_llm.transformers.models.utils import update_past_key_value
34
32
  from ipex_llm.transformers.models.utils import should_use_fuse_rope
35
33
 
36
34
 
@@ -98,52 +96,6 @@ def yuan_localized_filtering_forward(
98
96
  return lf_output
99
97
 
100
98
 
101
- def yuan_mlp_forward(
102
- self,
103
- x: torch.Tensor,
104
- residual=None
105
- ) -> torch.Tensor:
106
- x_2d = x.view(-1, x.shape[-1])
107
- bsz, hidden_size = x_2d.shape
108
- qtype = getattr(self.up_proj, "qtype", None)
109
- if mlp_fusion_check(x_2d, qtype, self.training):
110
- import xe_linear
111
- if not x_2d.is_contiguous():
112
- x_2d = x_2d.contiguous()
113
- out = self.down_proj(xe_linear.mlp_forward_xpu(
114
- x_2d, self.up_proj.weight.data, self.gate_proj.weight.data,
115
- x_2d.shape[0], x_2d.shape[1], self.up_proj.out_len,
116
- SILU, qtype
117
- ))
118
- if residual is not None:
119
- return out + residual
120
- else:
121
- return out
122
- elif fp16_fusion_check(self.up_proj, x, self.training) and \
123
- hidden_size == 4096 and bsz == 1:
124
- hidden_states1 = torch.ops.torch_ipex.mm_silu(x, self.up_proj.weight)
125
- hidden_states = torch.ops.torch_ipex.mm_resmul(
126
- x, self.gate_proj.weight, hidden_states1
127
- )
128
- if residual is None:
129
- hidden_states = torch.matmul(hidden_states, self.down_proj.weight)
130
- else:
131
- attn_output = torch.addmm(
132
- residual.flatten(0, -2),
133
- hidden_states.flatten(0, -2),
134
- self.down_proj.weight,
135
- beta=1,
136
- )
137
- hidden_states = attn_output.view(x.shape)
138
- return hidden_states
139
- else:
140
- out = self.down_proj(self.act_fn(self.up_proj(x)) * self.gate_proj(x))
141
- if residual is not None:
142
- return out + residual
143
- else:
144
- return out
145
-
146
-
147
99
  def yuan_attention_forward(
148
100
  self,
149
101
  hidden_states: torch.Tensor,
@@ -0,0 +1,155 @@
1
+ #
2
+ # Copyright 2016 The BigDL Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ import torch
18
+ import xe_linear
19
+ import xe_batch
20
+ import xe_addons
21
+
22
+
23
+ @torch.library.register_fake("ipex_llm::forward_new")
24
+ def _(x, weight, qtype, input_size):
25
+ return torch.empty_like(x)
26
+
27
+
28
+ # @torch.library.register_fake("ipex_llm::dequant")
29
+ # def _(x, weight, qtype):
30
+ # return ???
31
+
32
+
33
+ @torch.library.register_fake("ipex_llm::mlp_forward_xpu")
34
+ def _(x, weight1, weight2, batch_size, state_size, output_size, act_type, qtype):
35
+ return torch.empty_like(x)
36
+
37
+
38
+ # @torch.library.register_fake("ipex_llm::rwkv_linear_attention_v4")
39
+ # def _(time_decay, time_first, key, value, num_state, den_state, max_state)
40
+ # return ???
41
+
42
+
43
+ # @torch.library.register_fake("ipex_llm::rwkv_linear_attention_v5")
44
+ # def _(time_decay, time_first, receptance, key, value, state)
45
+ # return ???
46
+
47
+
48
+ # @torch.library.register_fake("ipex_llm::rwkv_time_shift")
49
+ # def _(hidden, shifted, mix):
50
+ # return ???
51
+
52
+
53
+ # @torch.library.register_fake("ipex_llm::dequantize_rows")
54
+ # def _(x, weight, qtype, state_size, output_size):
55
+ # return ???
56
+
57
+
58
+ @torch.library.register_fake("ipex_llm::batch_forward")
59
+ def _(x, weight, qtype):
60
+ return torch.empty_like(x)
61
+
62
+
63
+ @torch.library.register_fake("ipex_llm::sdp")
64
+ def _(query, key, value, mask):
65
+ return torch.empty(query.shape, dtype=query.dtype, device=query.device)
66
+
67
+
68
+ @torch.library.register_fake("ipex_llm::sdp_fp8")
69
+ def _(query, key, value, mask):
70
+ return torch.empty(query.shape, dtype=query.dtype, device=query.device)
71
+
72
+
73
+ @torch.library.register_fake("ipex_llm::sdp_causal")
74
+ def _(query, key, value, mask, scale):
75
+ return torch.empty(query.shape, dtype=query.dtype, device=query.device)
76
+
77
+
78
+ @torch.library.register_fake("ipex_llm::sdp_fp8_causal")
79
+ def _(query, key, value, mask, scale):
80
+ return torch.empty(query.shape, dtype=query.dtype, device=query.device)
81
+
82
+
83
+ @torch.library.register_fake("ipex_llm::sdp_non_causal")
84
+ def _(query, key, value, mask, scale):
85
+ return torch.empty(query.shape, dtype=query.dtype, device=query.device)
86
+
87
+
88
+ @torch.library.register_fake("ipex_llm::sdp_fp8_non_causal")
89
+ def _(query, key, value, mask, scale):
90
+ return torch.empty(query.shape, dtype=query.dtype, device=query.device)
91
+
92
+
93
+ @torch.library.register_fake("ipex_llm::siglip_sdp_non_causal")
94
+ def _(query, key, value, mask):
95
+ return torch.empty(query.shape, dtype=query.dtype, device=query.device)
96
+
97
+
98
+ @torch.library.register_fake("ipex_llm::gemma2_sdp")
99
+ def _(query, key, value, mask, f1, f2):
100
+ return torch.empty(query.shape, dtype=query.dtype, device=query.device)
101
+
102
+
103
+ @torch.library.register_fake("ipex_llm::gemma2_sdp_causal")
104
+ def _(query, key, value, mask, f1, f2):
105
+ return torch.empty(query.shape, dtype=query.dtype, device=query.device)
106
+
107
+
108
+ @torch.library.register_fake("ipex_llm::rms_norm")
109
+ def _(weight, x, eps):
110
+ return torch.empty_like(x)
111
+
112
+
113
+ @torch.library.register_fake("ipex_llm::layer_norm")
114
+ def _(x, weight, bias, eps):
115
+ return torch.empty_like(x)
116
+
117
+
118
+ @torch.library.register_fake("ipex_llm::rotary_half_inplaced")
119
+ def _(inv_freq, position_ids, query, key):
120
+ pass
121
+
122
+
123
+ @torch.library.register_fake("ipex_llm::rotary_two_inplaced")
124
+ def _(inv_freq, position_ids, query, key):
125
+ pass
126
+
127
+
128
+ @torch.library.register_fake("ipex_llm::rotary_half_with_cache_inplaced")
129
+ def _(query, key, cos, sin):
130
+ pass
131
+
132
+
133
+ @torch.library.register_fake("ipex_llm::rotary_two_with_cache_inplaced")
134
+ def _(query, key, cos, sin, half_layout):
135
+ pass
136
+
137
+
138
+ @torch.library.register_fake("ipex_llm::mlp_silu_mul_inplaced")
139
+ def _(gate, up):
140
+ pass
141
+
142
+
143
+ @torch.library.register_fake("ipex_llm::quantize_key_value")
144
+ def _(key, value, key_output, value_output):
145
+ pass
146
+
147
+
148
+ @torch.library.register_fake("ipex_llm::dequantize_key_value")
149
+ def _(key, value, key_output, value_output):
150
+ pass
151
+
152
+
153
+ @torch.library.register_fake("ipex_llm::attn_softmax_inplaced")
154
+ def _(attn):
155
+ pass
@@ -23,8 +23,7 @@ import transformers
23
23
  trans_version = transformers.__version__
24
24
 
25
25
  if trans_version >= "4.47.0":
26
- # TODO
27
- pass
26
+ from .benchmark_util_4_47 import BenchmarkWrapper
28
27
  elif trans_version >= "4.45.0":
29
28
  from .benchmark_util_4_45 import BenchmarkWrapper
30
29
  elif trans_version >= "4.44.0":